add workflow 客户信息补全【156,dev

This commit is contained in:
root 2025-12-08 11:08:39 +08:00
parent 3e0792e49d
commit 4778c8050c
4 changed files with 1153 additions and 891 deletions

View File

@ -5,6 +5,8 @@ from psycopg2.extras import RealDictCursor
import time import time
from typing import List, Dict, Any from typing import List, Dict, Any
import logging import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
# 配置日志 # 配置日志
logging.basicConfig( logging.basicConfig(
@ -32,30 +34,41 @@ class APIClient:
Returns: Returns:
API响应数据 API响应数据
""" """
payload = json.dumps({ payload = {
"inputs": inputs, "inputs": inputs,
"response_mode": "blocking", "response_mode": "blocking",
"user": "admin" "user": "admin"
}) }
logger.info(f"调用APIpayload: {json.dumps(payload, ensure_ascii=False)}")
try: try:
logger.info("调用带inputs参数的API") logger.info("调用带inputs参数的API")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=payload, json=payload, # Using json parameter instead of data to let requests handle serialization
timeout=1200 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -63,6 +76,7 @@ class APIClient:
"error": str(e) "error": str(e)
} }
def call_api_without_inputs(self, other_params: Dict[str, Any] = None) -> Dict[str, Any]: def call_api_without_inputs(self, other_params: Dict[str, Any] = None) -> Dict[str, Any]:
"""直接调用不带inputs参数的API """直接调用不带inputs参数的API
@ -83,23 +97,33 @@ class APIClient:
payload.update(other_params) payload.update(other_params)
try: try:
logger.info("调用不带inputs参数的API") logger.info(f"调用不带inputs参数的APIpayload: {json.dumps(payload, ensure_ascii=False)}")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=json.dumps(payload), json=payload, # Using json parameter instead of data
timeout=1200 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -117,31 +141,41 @@ class APIClient:
Returns: Returns:
API响应数据 API响应数据
""" """
payload = json.dumps({ payload = {
"inputs": {}, "inputs": {},
"query" : query, "query" : query,
"response_mode": "streaming", "response_mode": "blocking", # Changed from streaming to blocking for consistency
"user": "admin" "user": "admin"
}) }
try: try:
logger.info("调用带inputs参数的API") logger.info(f"调用带query参数的APIpayload: {json.dumps(payload, ensure_ascii=False)}")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=payload, json=payload, # Using json parameter instead of data
timeout=2400 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -161,7 +195,7 @@ class APIClient:
payload = { payload = {
"inputs": {}, "inputs": {},
"query":"", "query":"",
"response_mode": "streaming", "response_mode": "blocking", # Changed from streaming to blocking for consistency
"user": "admin" "user": "admin"
} }
@ -170,23 +204,33 @@ class APIClient:
payload.update(other_params) payload.update(other_params)
try: try:
logger.info("调用不带inputs参数的API") logger.info(f"调用不带query参数的APIpayload: {json.dumps(payload, ensure_ascii=False)}")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=json.dumps(payload), json=payload, # Using json parameter instead of data
timeout=2400 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -244,10 +288,49 @@ class DatabaseManager:
except Exception as e: except Exception as e:
logger.error(f"查询数据库失败: {e}") logger.error(f"查询数据库失败: {e}")
return [] return []
def process_single_item(api_client, item_text, type, item_index, total_count):
"""处理单个项目"""
logger.info(f"开始处理第 {item_index}/{total_count} 个数据: {item_text}")
try:
if type == 'workflow':
# 方法1: 使用带inputs参数的调用
inputs_data = {
'companys': f"{item_text}"
}
print(inputs_data)
result = api_client.call_api_with_inputs(inputs_data)
if result['success']:
logger.info(f"数据 {item_text} 处理成功")
else:
logger.error(f"数据 {item_text} 处理失败: {result.get('error')}")
return result
else:
#agent
query = item_text
result = api_client.call_api_with_query(query)
if result['success']:
logger.info(f"数据 {item_text} 处理成功")
else:
logger.error(f"数据 {item_text} 处理失败: {result.get('error')}")
return result
except Exception as e:
logger.error(f"处理数据 {item_text} 时发生异常: {e}")
return {"success": False, "error": str(e)}
#DATABASE_URL=postgresql://dbuser_dba:EmBRxnmmjnE3@124.221.232.219:5432/daas_mpp #DATABASE_URL=postgresql://dbuser_dba:EmBRxnmmjnE3@124.221.232.219:5432/daas_mpp
#DATABASE_SCHEMA=p70_ai_intelligence #DATABASE_SCHEMA=p70_ai_intelligence
def main(): def main():
"""主函数""" """主函数"""
# 配置并发数
MAX_WORKERS = 3 # 可调整为5或10
# 数据库配置 # 数据库配置
db_config = { db_config = {
'host': '124.221.232.219', 'host': '124.221.232.219',
@ -260,13 +343,12 @@ def main():
# API配置 # API配置
api_config = { api_config = {
'url': 'https://tk-agent.idgvalue.com/v1/workflows/run', 'url': 'http://10.168.1.153:18000/v1/workflows/run',
'auth_token': 'Bearer app-pv9rLM4ukTRayP2YD60cS4k5' 'auth_token': 'Bearer app-JYQCHu09hlZn0b0OUVW3PRdr'
} }
api_client = APIClient(api_config['url'], api_config['auth_token']) api_client = APIClient(api_config['url'], api_config['auth_token'])
type = 'agent'
type = 'workflow' type = 'workflow'
try: try:
@ -275,7 +357,33 @@ def main():
if flag: if flag:
# 初始化 # 初始化
db_manager = DatabaseManager(db_config) db_manager = DatabaseManager(db_config)
custom_query = """ ${custom_query} """ custom_query = """
WITH numbered_names AS (
SELECT
name,
(ROW_NUMBER() OVER (ORDER BY dw_account) - 1) /5 as batch_num
FROM p30_common.v_sql_cleaned_cn_d_account_info
WHERE name NOT IN (
SELECT "search"
FROM p70_ai_intelligence.agent_account_info
WHERE "search" IS NOT NULL
)
and name NOT IN (
SELECT name
FROM p70_ai_intelligence.agent_execp_account
WHERE name IS NOT NULL
)
order by dw_account
offset 2250
limit 4
)
SELECT
STRING_AGG(name, E'\n') as batched_names
FROM numbered_names
GROUP BY batch_num
ORDER BY batch_num;
"""
try: try:
# 从数据库获取URL列表 # 从数据库获取URL列表
@ -285,37 +393,39 @@ def main():
logger.warning("未获取到URL") logger.warning("未获取到URL")
return return
# 遍历每个URL调用API # 展平所有数据项为单个列表
for i, text in enumerate(list, 1): # all_items = []
logger.info(f"处理第 {i}/{len(list)} 个数据: {text}") # for batch in list:
# items = batch.split('\n')
# all_items.extend(items)
if type is 'workflow': logger.info(f"总共需要处理 {len(list)} 个项目,使用 {MAX_WORKERS} 个并发线程")
# 方法1: 使用带inputs参数的调用
inputs_data = {
"urls": f"{text}"
}
result = api_client.call_api_with_inputs(inputs_data) # 使用线程池并发处理
success_count = 0
failed_count = 0
if result['success']: with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
logger.info(f"URL {text} 处理成功") # 提交所有任务
else: future_to_item = {
logger.error(f"URL {text} 处理失败: {result.get('error')}") executor.submit(process_single_item, api_client, item, type, i+1, len(list)): item
for i, item in enumerate(list)
}
# 收集结果
for future in as_completed(future_to_item):
item = future_to_item[future]
try:
result = future.result()
if result.get("success"):
success_count += 1
else:
failed_count += 1
except Exception as e:
logger.error(f"处理项目 {item} 时发生异常: {e}")
failed_count += 1
else: logger.info(f"处理完成。成功: {success_count}, 失败: {failed_count}")
#agent
query = text
result = api_client.call_api_with_query(query)
if result['success']:
logger.info(f"URL {text} 处理成功")
else:
logger.error(f"URL {text} 处理失败: {result.get('error')}")
# 可选:添加延迟避免请求过于频繁
if i < len(text):
time.sleep(1)
except Exception as e: except Exception as e:
logger.error(f"程序执行失败: {e}") logger.error(f"程序执行失败: {e}")
@ -325,7 +435,7 @@ def main():
else: else:
logger.info("调用不带inputs参数的API示例") logger.info("调用不带inputs参数的API示例")
if type is 'workflow': if type == 'workflow':
result2 = api_client.call_api_without_inputs() result2 = api_client.call_api_without_inputs()
if result2['success']: if result2['success']:

View File

@ -34,31 +34,41 @@ class APIClient:
Returns: Returns:
API响应数据 API响应数据
""" """
payload = json.dumps({ payload = {
"inputs": inputs, "inputs": inputs,
"response_mode": "blocking", "response_mode": "blocking",
"user": "admin" "user": "admin"
}) }
logger.info(f"调用APIpayload: {payload}") logger.info(f"调用APIpayload: {json.dumps(payload, ensure_ascii=False)}")
try: try:
logger.info("调用带inputs参数的API") logger.info("调用带inputs参数的API")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=payload, json=payload, # Using json parameter instead of data to let requests handle serialization
timeout=2400 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -87,23 +97,33 @@ class APIClient:
payload.update(other_params) payload.update(other_params)
try: try:
logger.info("调用不带inputs参数的API") logger.info(f"调用不带inputs参数的APIpayload: {json.dumps(payload, ensure_ascii=False)}")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=json.dumps(payload), json=payload, # Using json parameter instead of data
timeout=1200 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -121,31 +141,41 @@ class APIClient:
Returns: Returns:
API响应数据 API响应数据
""" """
payload = json.dumps({ payload = {
"inputs": {}, "inputs": {},
"query" : query, "query" : query,
"response_mode": "streaming", "response_mode": "blocking", # Changed from streaming to blocking for consistency
"user": "admin" "user": "admin"
}) }
try: try:
logger.info("调用带inputs参数的API") logger.info(f"调用带query参数的APIpayload: {json.dumps(payload, ensure_ascii=False)}")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=payload, json=payload, # Using json parameter instead of data
timeout=2400 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -165,7 +195,7 @@ class APIClient:
payload = { payload = {
"inputs": {}, "inputs": {},
"query":"", "query":"",
"response_mode": "streaming", "response_mode": "blocking", # Changed from streaming to blocking for consistency
"user": "admin" "user": "admin"
} }
@ -174,23 +204,33 @@ class APIClient:
payload.update(other_params) payload.update(other_params)
try: try:
logger.info("调用不带inputs参数的API") logger.info(f"调用不带query参数的APIpayload: {json.dumps(payload, ensure_ascii=False)}")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=json.dumps(payload), json=payload, # Using json parameter instead of data
timeout=2400 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -259,6 +299,7 @@ def process_single_item(api_client, item_text, type, item_index, total_count):
inputs_data = { inputs_data = {
'companys': f"{item_text}" 'companys': f"{item_text}"
} }
print(inputs_data)
result = api_client.call_api_with_inputs(inputs_data) result = api_client.call_api_with_inputs(inputs_data)
@ -288,7 +329,7 @@ def process_single_item(api_client, item_text, type, item_index, total_count):
def main(): def main():
"""主函数""" """主函数"""
# 配置并发数 # 配置并发数
MAX_WORKERS = 10 # 可调整为5或10 MAX_WORKERS = 3 # 可调整为5或10
# 数据库配置 # 数据库配置
db_config = { db_config = {
@ -308,7 +349,6 @@ def main():
api_client = APIClient(api_config['url'], api_config['auth_token']) api_client = APIClient(api_config['url'], api_config['auth_token'])
type = 'agent'
type = 'workflow' type = 'workflow'
try: try:
@ -321,7 +361,7 @@ def main():
WITH numbered_names AS ( WITH numbered_names AS (
SELECT SELECT
name, name,
(ROW_NUMBER() OVER (ORDER BY dw_account) - 1) /10 as batch_num (ROW_NUMBER() OVER (ORDER BY dw_account) - 1) /5 as batch_num
FROM p30_common.v_sql_cleaned_cn_d_account_info FROM p30_common.v_sql_cleaned_cn_d_account_info
WHERE name NOT IN ( WHERE name NOT IN (
SELECT "search" SELECT "search"
@ -333,7 +373,9 @@ WITH numbered_names AS (
FROM p70_ai_intelligence.agent_execp_account FROM p70_ai_intelligence.agent_execp_account
WHERE name IS NOT NULL WHERE name IS NOT NULL
) )
limit 100 order by dw_account
offset 750
limit 5
) )
SELECT SELECT
STRING_AGG(name, E'\n') as batched_names STRING_AGG(name, E'\n') as batched_names

View File

@ -5,6 +5,8 @@ from psycopg2.extras import RealDictCursor
import time import time
from typing import List, Dict, Any from typing import List, Dict, Any
import logging import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
# 配置日志 # 配置日志
logging.basicConfig( logging.basicConfig(
@ -32,30 +34,41 @@ class APIClient:
Returns: Returns:
API响应数据 API响应数据
""" """
payload = json.dumps({ payload = {
"inputs": inputs, "inputs": inputs,
"response_mode": "blocking", "response_mode": "blocking",
"user": "admin" "user": "admin"
}) }
logger.info(f"调用APIpayload: {json.dumps(payload, ensure_ascii=False)}")
try: try:
logger.info("调用带inputs参数的API") logger.info("调用带inputs参数的API")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=payload, json=payload, # Using json parameter instead of data to let requests handle serialization
timeout=1200 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -63,6 +76,7 @@ class APIClient:
"error": str(e) "error": str(e)
} }
def call_api_without_inputs(self, other_params: Dict[str, Any] = None) -> Dict[str, Any]: def call_api_without_inputs(self, other_params: Dict[str, Any] = None) -> Dict[str, Any]:
"""直接调用不带inputs参数的API """直接调用不带inputs参数的API
@ -83,23 +97,33 @@ class APIClient:
payload.update(other_params) payload.update(other_params)
try: try:
logger.info("调用不带inputs参数的API") logger.info(f"调用不带inputs参数的APIpayload: {json.dumps(payload, ensure_ascii=False)}")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=json.dumps(payload), json=payload, # Using json parameter instead of data
timeout=1200 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -117,31 +141,41 @@ class APIClient:
Returns: Returns:
API响应数据 API响应数据
""" """
payload = json.dumps({ payload = {
"inputs": {}, "inputs": {},
"query" : query, "query" : query,
"response_mode": "streaming", "response_mode": "blocking", # Changed from streaming to blocking for consistency
"user": "admin" "user": "admin"
}) }
try: try:
logger.info("调用带inputs参数的API") logger.info(f"调用带query参数的APIpayload: {json.dumps(payload, ensure_ascii=False)}")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=payload, json=payload, # Using json parameter instead of data
timeout=2400 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -161,7 +195,7 @@ class APIClient:
payload = { payload = {
"inputs": {}, "inputs": {},
"query":"", "query":"",
"response_mode": "streaming", "response_mode": "blocking", # Changed from streaming to blocking for consistency
"user": "admin" "user": "admin"
} }
@ -170,23 +204,33 @@ class APIClient:
payload.update(other_params) payload.update(other_params)
try: try:
logger.info("调用不带inputs参数的API") logger.info(f"调用不带query参数的APIpayload: {json.dumps(payload, ensure_ascii=False)}")
response = requests.post( response = requests.post(
self.api_url, self.api_url,
headers=self.headers, headers=self.headers,
data=json.dumps(payload), json=payload, # Using json parameter instead of data
timeout=2400 timeout=300 # Reduced timeout to more reasonable value
) )
logger.info(f"API调用完成状态码: {response.status_code}")
logger.info(f"Response content: {response.text[:500]}") # Log first 500 chars of response
response.raise_for_status() response.raise_for_status()
logger.info(f"API调用成功状态码: {response.status_code}") logger.info(f"API调用成功状态码: {response.status_code}")
return { return {
"success": True, "success": True,
"status_code": response.status_code, "status_code": response.status_code,
"data": response.json() "data": response.json() if response.content else {}
} }
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP错误: {e.response.status_code} - {e.response.text}")
return {
"success": False,
"error": f"HTTP {e.response.status_code}: {e.response.text}",
"status_code": e.response.status_code
}
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error(f"API调用失败: {e}") logger.error(f"API调用失败: {e}")
return { return {
@ -244,10 +288,49 @@ class DatabaseManager:
except Exception as e: except Exception as e:
logger.error(f"查询数据库失败: {e}") logger.error(f"查询数据库失败: {e}")
return [] return []
def process_single_item(api_client, item_text, type, item_index, total_count):
"""处理单个项目"""
logger.info(f"开始处理第 {item_index}/{total_count} 个数据: {item_text}")
try:
if type == 'workflow':
# 方法1: 使用带inputs参数的调用
inputs_data = {
'companys': f"{item_text}"
}
print(inputs_data)
result = api_client.call_api_with_inputs(inputs_data)
if result['success']:
logger.info(f"数据 {item_text} 处理成功")
else:
logger.error(f"数据 {item_text} 处理失败: {result.get('error')}")
return result
else:
#agent
query = item_text
result = api_client.call_api_with_query(query)
if result['success']:
logger.info(f"数据 {item_text} 处理成功")
else:
logger.error(f"数据 {item_text} 处理失败: {result.get('error')}")
return result
except Exception as e:
logger.error(f"处理数据 {item_text} 时发生异常: {e}")
return {"success": False, "error": str(e)}
#DATABASE_URL=postgresql://dbuser_dba:EmBRxnmmjnE3@124.221.232.219:5432/daas_mpp #DATABASE_URL=postgresql://dbuser_dba:EmBRxnmmjnE3@124.221.232.219:5432/daas_mpp
#DATABASE_SCHEMA=p70_ai_intelligence #DATABASE_SCHEMA=p70_ai_intelligence
def main(): def main():
"""主函数""" """主函数"""
# 配置并发数
MAX_WORKERS = 3 # 可调整为5或10
# 数据库配置 # 数据库配置
db_config = { db_config = {
'host': '124.221.232.219', 'host': '124.221.232.219',
@ -260,13 +343,12 @@ def main():
# API配置 # API配置
api_config = { api_config = {
'url': 'https://tk-agent.idgvalue.com/v1/workflows/run', 'url': 'http://10.168.1.162:18000/v1/workflows/run',
'auth_token': 'Bearer app-pv9rLM4ukTRayP2YD60cS4k5' 'auth_token': 'Bearer app-C0iPJse2Iutj7D6sngAv2eUv'
} }
api_client = APIClient(api_config['url'], api_config['auth_token']) api_client = APIClient(api_config['url'], api_config['auth_token'])
type = 'agent'
type = 'workflow' type = 'workflow'
try: try:
@ -275,7 +357,33 @@ def main():
if flag: if flag:
# 初始化 # 初始化
db_manager = DatabaseManager(db_config) db_manager = DatabaseManager(db_config)
custom_query = """ ${custom_query} """ custom_query = """
WITH numbered_names AS (
SELECT
name,
(ROW_NUMBER() OVER (ORDER BY dw_account) - 1) /5 as batch_num
FROM p30_common.v_sql_cleaned_cn_d_account_info
WHERE name NOT IN (
SELECT "search"
FROM p70_ai_intelligence.agent_account_info
WHERE "search" IS NOT NULL
)
and name NOT IN (
SELECT name
FROM p70_ai_intelligence.agent_execp_account
WHERE name IS NOT NULL
)
order by dw_account
offset 1500
limit 5
)
SELECT
STRING_AGG(name, E'\n') as batched_names
FROM numbered_names
GROUP BY batch_num
ORDER BY batch_num;
"""
try: try:
# 从数据库获取URL列表 # 从数据库获取URL列表
@ -285,37 +393,39 @@ def main():
logger.warning("未获取到URL") logger.warning("未获取到URL")
return return
# 遍历每个URL调用API # 展平所有数据项为单个列表
for i, text in enumerate(list, 1): # all_items = []
logger.info(f"处理第 {i}/{len(list)} 个数据: {text}") # for batch in list:
# items = batch.split('\n')
# all_items.extend(items)
if type is 'workflow': logger.info(f"总共需要处理 {len(list)} 个项目,使用 {MAX_WORKERS} 个并发线程")
# 方法1: 使用带inputs参数的调用
inputs_data = {
"urls": f"{text}"
}
result = api_client.call_api_with_inputs(inputs_data) # 使用线程池并发处理
success_count = 0
failed_count = 0
if result['success']: with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
logger.info(f"URL {text} 处理成功") # 提交所有任务
else: future_to_item = {
logger.error(f"URL {text} 处理失败: {result.get('error')}") executor.submit(process_single_item, api_client, item, type, i+1, len(list)): item
for i, item in enumerate(list)
}
# 收集结果
for future in as_completed(future_to_item):
item = future_to_item[future]
try:
result = future.result()
if result.get("success"):
success_count += 1
else:
failed_count += 1
except Exception as e:
logger.error(f"处理项目 {item} 时发生异常: {e}")
failed_count += 1
else: logger.info(f"处理完成。成功: {success_count}, 失败: {failed_count}")
#agent
query = text
result = api_client.call_api_with_query(query)
if result['success']:
logger.info(f"URL {text} 处理成功")
else:
logger.error(f"URL {text} 处理失败: {result.get('error')}")
# 可选:添加延迟避免请求过于频繁
if i < len(text):
time.sleep(1)
except Exception as e: except Exception as e:
logger.error(f"程序执行失败: {e}") logger.error(f"程序执行失败: {e}")
@ -325,7 +435,7 @@ def main():
else: else:
logger.info("调用不带inputs参数的API示例") logger.info("调用不带inputs参数的API示例")
if type is 'workflow': if type == 'workflow':
result2 = api_client.call_api_without_inputs() result2 = api_client.call_api_without_inputs()
if result2['success']: if result2['success']:

View File

@ -367,14 +367,14 @@ WITH numbered_names AS (
SELECT "search" SELECT "search"
FROM p70_ai_intelligence.agent_account_info FROM p70_ai_intelligence.agent_account_info
WHERE "search" IS NOT NULL WHERE "search" IS NOT NULL
limit 100
) )
and name NOT IN ( and name NOT IN (
SELECT name SELECT name
FROM p70_ai_intelligence.agent_execp_account FROM p70_ai_intelligence.agent_execp_account
WHERE name IS NOT NULL WHERE name IS NOT NULL
) )
limit 200 order by dw_account
limit 5
) )
SELECT SELECT
STRING_AGG(name, E'\n') as batched_names STRING_AGG(name, E'\n') as batched_names