320 lines
12 KiB
Python
320 lines
12 KiB
Python
|
|
import requests
|
|||
|
|
import json
|
|||
|
|
import pandas as pd
|
|||
|
|
import random
|
|||
|
|
from itertools import product
|
|||
|
|
|
|||
|
|
template_list = [
|
|||
|
|
{ # 时间、客户、公司
|
|||
|
|
"keys": ['time', 'customer', 'company'],
|
|||
|
|
"sql": """
|
|||
|
|
SELECT `合同名称`, `签订日期`, `客户名称`, `所属分公司`
|
|||
|
|
FROM contracts
|
|||
|
|
WHERE `所属分公司` LIKE '%{company}%'
|
|||
|
|
AND `客户名称` LIKE '%{customer}%' OR `客商类型` LIKE '%{customer}%' OR `合同名称` LIKE '%{customer}%'
|
|||
|
|
(AND 根据{time}设置`签订时间`的筛选范围)""",
|
|||
|
|
"question": "{time}{company}承接的“客户名称”或“客商类型”或“合同名称”包含{customer}的项目有哪些?参考sql语句:[sql]",
|
|||
|
|
},
|
|||
|
|
{ # 时间、行业(包括专业)
|
|||
|
|
"keys": ['time', 'field'],
|
|||
|
|
"sql": """
|
|||
|
|
SELECT `合同名称`, `签订日期`, `聚焦行业`
|
|||
|
|
FROM contracts
|
|||
|
|
WHERE `合同名称` LIKE '%{field}%' OR `聚焦行业` LIKE '%{field}%' OR `专业` LIKE '%{field}%'
|
|||
|
|
(AND 根据{time}设置`签订时间`的筛选范围)""",
|
|||
|
|
"question": "{time}承接的“聚焦行业”或“行业”包含{field}的项目有哪些?参考sql语句:[sql]"
|
|||
|
|
},
|
|||
|
|
{ # 时间、公司、专业
|
|||
|
|
"keys": ['time', 'company', 'major'],
|
|||
|
|
"sql": """
|
|||
|
|
SELECT `合同名称`, `签订日期`, `专业`, `所属分公司`
|
|||
|
|
FROM contracts
|
|||
|
|
WHERE `合同名称` LIKE '%{major}%' OR `专业` LIKE '%{major}%'
|
|||
|
|
AND `所属分公司` LIKE '%{company}%'
|
|||
|
|
(AND 根据{time}设置`签订时间`的筛选范围)""",
|
|||
|
|
"question": "{time}{company}承接的“专业”包含{major}的项目有哪些?参考sql语句:[sql]"
|
|||
|
|
},
|
|||
|
|
{ # 时间、地点
|
|||
|
|
"keys": ['time', 'area'],
|
|||
|
|
"sql": """
|
|||
|
|
SELECT `合同名称`, `地点`
|
|||
|
|
FROM contracts
|
|||
|
|
WHERE `地点` LIKE '%{area}%'
|
|||
|
|
(AND 根据{time}设置`签订时间`的筛选范围)""",
|
|||
|
|
"question": "{time}承接的地点在“{area}”的项目有哪些?参考sql语句:[sql]",
|
|||
|
|
},
|
|||
|
|
{ # 关键字查询(专业、行业、公司、客户、地点)
|
|||
|
|
"keys": ['keyword'],
|
|||
|
|
"sql": """
|
|||
|
|
SELECT `合同名称`, `所属分公司`, `专业`, `聚焦行业`, `客商类型`, `地点`
|
|||
|
|
FROM contracts
|
|||
|
|
WHERE `合同名称` LIKE '%{keyword}%' OR `聚焦行业` LIKE '%{keyword}%'
|
|||
|
|
OR `所属分公司` LIKE '%{keyword}%' OR `客商类型` LIKE '%{keyword}%'
|
|||
|
|
OR `地点` LIKE '%{keyword}%' OR `客户名称` LIKE '%{keyword}%'
|
|||
|
|
OR `专业` LIKE '%{keyword}%'""",
|
|||
|
|
"question": "“地点”“专业”“聚焦行业”“所属分公司”“客商类型”“客户名称”或“合同名称”包含{keyword}的项目有哪些?参考sql语句:[sql]",
|
|||
|
|
},
|
|||
|
|
{ # 公司、金额
|
|||
|
|
"keys": ['company', 'money'],
|
|||
|
|
"sql": """
|
|||
|
|
SELECT `合同名称`, `所属分公司`, `合同签订金额(人民币)`
|
|||
|
|
FROM contracts
|
|||
|
|
WHERE `公司` LIKE '%{company}%'
|
|||
|
|
(AND 根据{money}筛选`合同签订金额(人民币)`的筛选范围)""",
|
|||
|
|
"question": "{company}的合同金额{money}的项目有哪些?参考sql语句:[sql]"
|
|||
|
|
}]
|
|||
|
|
|
|||
|
|
raw_data = {
|
|||
|
|
'time': list(pd.read_excel("./valueSets.xlsx", sheet_name="时间取值")["时间取值"]),
|
|||
|
|
'major': list(pd.read_excel("./valueSets.xlsx", sheet_name="专业取值")["专业取值"]),
|
|||
|
|
'company': list(pd.read_excel("./valueSets.xlsx", sheet_name="公司取值")["公司取值"]),
|
|||
|
|
'field': list(pd.read_excel("./valueSets.xlsx", sheet_name="行业取值")["行业取值"]),
|
|||
|
|
'customer': list(pd.read_excel("./valueSets.xlsx", sheet_name="客户取值")["客户取值"]),
|
|||
|
|
'area': list(pd.read_excel("./valueSets.xlsx", sheet_name="地区取值")["地区取值"]),
|
|||
|
|
'money': [ "100万以上的", "50万以上的", "2000万以上", "400万以上", "10万以上", "100万到1000万",], # 金额范围取值
|
|||
|
|
}
|
|||
|
|
raw_data['keyword'] = [item for key in ['area', 'company', 'customer', 'major', 'field'] for item in raw_data[key]]
|
|||
|
|
|
|||
|
|
def combine_key_val(keys: list, k=4) -> dict:
|
|||
|
|
"""
|
|||
|
|
计算不同字段的排列组合
|
|||
|
|
Args:
|
|||
|
|
keys (_type_): 字段名列表
|
|||
|
|
k (int, optional): 每个字段的取值数量. Defaults to 4.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
list: [{key1: value1}]字典列表
|
|||
|
|
|
|||
|
|
"""
|
|||
|
|
tmp = {}
|
|||
|
|
for key in keys:
|
|||
|
|
value = raw_data.get(key, [])
|
|||
|
|
# if key == 'company':
|
|||
|
|
# tmp[key] = ['一分公司']
|
|||
|
|
# else:
|
|||
|
|
# k = len(value) if k > len(value) else k
|
|||
|
|
# tmp[key] = random.sample(value, k=k)
|
|||
|
|
k = len(value) if k > len(value) else k
|
|||
|
|
tmp[key] = random.sample(value, k=k)
|
|||
|
|
print(tmp)
|
|||
|
|
# 计算笛卡尔积
|
|||
|
|
keys = list(tmp.keys())
|
|||
|
|
values = list(tmp.values())
|
|||
|
|
combinations = list(product(*values))
|
|||
|
|
# 将每个组合转换为字典
|
|||
|
|
return [{key: value for key, value in zip(keys, combination)} for combination in combinations]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def form_question(keys, k) -> list:
|
|||
|
|
"""根据选填字段生成问题
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
keys (_type_): _description_
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
list: _description_
|
|||
|
|
"""
|
|||
|
|
combination_dict = combine_key_val(keys, k=k)
|
|||
|
|
print(len(combination_dict))
|
|||
|
|
template_idx = 0
|
|||
|
|
for index, item in enumerate(template_list):
|
|||
|
|
if set(item['keys']) == set(keys):
|
|||
|
|
template_idx = index
|
|||
|
|
break
|
|||
|
|
template = template_list[template_idx]
|
|||
|
|
question_template = template['question'].replace('[sql]', template['sql'])
|
|||
|
|
|
|||
|
|
question_list = []
|
|||
|
|
tmp = ''
|
|||
|
|
for combination in combination_dict:
|
|||
|
|
tmp = question_template
|
|||
|
|
tmp = tmp.format_map(combination)
|
|||
|
|
question_list.append(tmp)
|
|||
|
|
return question_list
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test(keys, k=3):
|
|||
|
|
question_list = form_question(keys, k=k)
|
|||
|
|
try:
|
|||
|
|
res = [["问题", "sql", "回答", "thought"]]
|
|||
|
|
for question in question_list:
|
|||
|
|
if question == "":
|
|||
|
|
break
|
|||
|
|
payload = {"return_type": "text", "question": question}
|
|||
|
|
response = requests.post(url, headers=headers, data=json.dumps(payload))
|
|||
|
|
response_data = response.json()
|
|||
|
|
print("data_res", response_data)
|
|||
|
|
res.append(
|
|||
|
|
[
|
|||
|
|
question,
|
|||
|
|
response_data["sql"],
|
|||
|
|
response_data["result"],
|
|||
|
|
response_data["thought"],
|
|||
|
|
]
|
|||
|
|
)
|
|||
|
|
data = pd.DataFrame(res)
|
|||
|
|
data.to_excel("./output/{}.xlsx".format('-'.join(keys)), index=False)
|
|||
|
|
except Exception as error:
|
|||
|
|
print("Error:", error)
|
|||
|
|
return "Error"
|
|||
|
|
|
|||
|
|
url = "http://localhost:8001/qwen/contracts?apikey=YUVietLgiGmtqzYUVIIGjrNoLMsGM0FI"
|
|||
|
|
headers = {
|
|||
|
|
"Content-Type": "application/json",
|
|||
|
|
# Uncomment the following line if needed and provide the appropriate value for data['apiKey']
|
|||
|
|
# 'Authorization': f'Bearer {data["apiKey"]}'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
# # def some_function():
|
|||
|
|
# # print(pdata["问题"])
|
|||
|
|
|
|||
|
|
# ls = pdata["问题"]
|
|||
|
|
# qs = []
|
|||
|
|
# tmp = []
|
|||
|
|
# res_q = []
|
|||
|
|
# # for ll in ["数据库字段中的“合同名称”或“聚焦行业”或“专业”包含“key”的项目有哪些?"]:
|
|||
|
|
# # for ll in ["time公司承接的money的项目有哪些/多少?今年是2024年。"]:
|
|||
|
|
# # for ll in ["time公司承接的customer的项目有哪些/多少?其中,值为“customer”是数据库中“客商类型”字段的部分内容。今年是2024年。"]:
|
|||
|
|
# for ll in [
|
|||
|
|
# "company time承接的field的项目有哪些?其中,值为“company”是数据库中“所属分公司”字段的部分内容,“field”是数据库中“合同名称”或“聚焦行业”字段的部分内容。今年是2024年。"
|
|||
|
|
# ]:
|
|||
|
|
# # for ll in ["time公司承接的field的项目有哪些/多少?其中,值为“field”是数据库中“合同名称”或“聚焦行业”字段的部分内容。"]:
|
|||
|
|
|
|||
|
|
# qs = []
|
|||
|
|
# tmp = []
|
|||
|
|
# tmp.append(ll)
|
|||
|
|
# if "time" in ll:
|
|||
|
|
# for ts in time:
|
|||
|
|
# t0 = ll.replace("time", ts)
|
|||
|
|
# qs.append(t0)
|
|||
|
|
# tmp = qs
|
|||
|
|
# qs = []
|
|||
|
|
|
|||
|
|
# for t1 in tmp:
|
|||
|
|
# if "major" in t1:
|
|||
|
|
# for sp in major:
|
|||
|
|
# t0 = t1.replace("major", sp)
|
|||
|
|
# qs.append(t0)
|
|||
|
|
# else:
|
|||
|
|
# qs.append(t1)
|
|||
|
|
|
|||
|
|
# tmp = qs
|
|||
|
|
# qs = []
|
|||
|
|
|
|||
|
|
# for t1 in tmp:
|
|||
|
|
# if "company" in t1:
|
|||
|
|
# for sp in company:
|
|||
|
|
# t0 = t1.replace("company", sp)
|
|||
|
|
# qs.append(t0)
|
|||
|
|
# else:
|
|||
|
|
# qs.append(t1)
|
|||
|
|
|
|||
|
|
# tmp = qs
|
|||
|
|
# qs = []
|
|||
|
|
|
|||
|
|
# for t1 in tmp:
|
|||
|
|
# if "field" in t1:
|
|||
|
|
# for sp in field:
|
|||
|
|
# t0 = t1.replace("field", sp)
|
|||
|
|
# qs.append(t0)
|
|||
|
|
# else:
|
|||
|
|
# qs.append(t1)
|
|||
|
|
|
|||
|
|
# tmp = qs
|
|||
|
|
# qs = []
|
|||
|
|
|
|||
|
|
# for t1 in tmp:
|
|||
|
|
# if "key" in t1:
|
|||
|
|
# for sp in key:
|
|||
|
|
# t0 = t1.replace("key", sp)
|
|||
|
|
# qs.append(t0)
|
|||
|
|
# else:
|
|||
|
|
# qs.append(t1)
|
|||
|
|
|
|||
|
|
# tmp = qs
|
|||
|
|
# qs = []
|
|||
|
|
|
|||
|
|
# # money
|
|||
|
|
# for t1 in tmp:
|
|||
|
|
# if "money" in t1:
|
|||
|
|
# for sp in money:
|
|||
|
|
# t0 = t1.replace("money", sp)
|
|||
|
|
# qs.append(t0)
|
|||
|
|
# else:
|
|||
|
|
# qs.append(t1)
|
|||
|
|
|
|||
|
|
# tmp = qs
|
|||
|
|
# qs = []
|
|||
|
|
|
|||
|
|
# for t1 in tmp:
|
|||
|
|
# if "customer" in t1:
|
|||
|
|
# for sp in customer:
|
|||
|
|
# t0 = t1.replace("customer", sp)
|
|||
|
|
# qs.append(t0)
|
|||
|
|
# else:
|
|||
|
|
# qs.append(t1)
|
|||
|
|
|
|||
|
|
# tmp = qs
|
|||
|
|
# qs = []
|
|||
|
|
|
|||
|
|
# for t1 in tmp:
|
|||
|
|
# if "area" in t1:
|
|||
|
|
# for sp in area:
|
|||
|
|
# t0 = t1.replace("area", sp)
|
|||
|
|
# qs.append(t0)
|
|||
|
|
# else:
|
|||
|
|
# qs.append(t1)
|
|||
|
|
|
|||
|
|
# # tmp = qs
|
|||
|
|
# # qs = []
|
|||
|
|
# # print(len(qs))
|
|||
|
|
# res_q = [*res_q, *qs]
|
|||
|
|
# print(len(res_q))
|
|||
|
|
# # print(res_q)
|
|||
|
|
|
|||
|
|
# # return
|
|||
|
|
# try:
|
|||
|
|
# res = [["问题", "sql", "回答", "thought"]]
|
|||
|
|
|
|||
|
|
# for ll in res_q:
|
|||
|
|
# if ll == "":
|
|||
|
|
# break
|
|||
|
|
# payload = {"return_type": "text", "question": ll}
|
|||
|
|
# response = requests.post(url, headers=headers, data=json.dumps(payload))
|
|||
|
|
# response_data = response.json()
|
|||
|
|
# print("data_res", response_data)
|
|||
|
|
# res.append(
|
|||
|
|
# [
|
|||
|
|
# ll,
|
|||
|
|
# response_data["sql"],
|
|||
|
|
# response_data["result"],
|
|||
|
|
# response_data["thought"],
|
|||
|
|
# ]
|
|||
|
|
# )
|
|||
|
|
# data = pd.DataFrame(res)
|
|||
|
|
# data.to_excel("./行业-时间.xlsx", index=False)
|
|||
|
|
# except Exception as error:
|
|||
|
|
# print("Error:", error)
|
|||
|
|
# return "Error"
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
keys_combination_list = [
|
|||
|
|
['time', 'customer', 'company'],
|
|||
|
|
['time', 'field'],
|
|||
|
|
['time', 'company', 'major'],
|
|||
|
|
['time', 'area'],
|
|||
|
|
['keyword'],
|
|||
|
|
['company', 'money']
|
|||
|
|
]
|
|||
|
|
# print(form_question(keys_combination_list[0]))
|
|||
|
|
# form_question(keys_combination_list[2],k=3)
|
|||
|
|
# test(keys_combination_list[0], k=3)
|
|||
|
|
# test(keys_combination_list[1], k=3)
|
|||
|
|
# test(keys_combination_list[2], k=3)
|
|||
|
|
# test(keys_combination_list[3], k=3)
|
|||
|
|
# test(keys_combination_list[4], k=20)
|
|||
|
|
test(keys_combination_list[5], k=3)
|
|||
|
|
|