add workflow partner1site,dev
This commit is contained in:
parent
96e496e17b
commit
a3079f4da6
|
@ -94,6 +94,26 @@ depends_on_past=False,
|
|||
retries=3,
|
||||
dag=dag)
|
||||
|
||||
part_summary_pos_feign = SSHOperator(
|
||||
ssh_hook=sshHook,
|
||||
task_id='part_summary_pos_feign',
|
||||
command='python3 /data/airflow/etl/API/part_summary_pos_feign.py',
|
||||
depends_on_past=False,
|
||||
retries=3,
|
||||
dag=dag)
|
||||
|
||||
part_summary_pos_load = SSHOperator(
|
||||
ssh_hook=sshHook,
|
||||
task_id='part_summary_pos_load',
|
||||
command='/data/airflow/etl/API/run_psql.sh {{ ds_nodash }} {{params.my_param }}',
|
||||
params={'my_param':"part_summary_pos_load"},
|
||||
depends_on_past=False,
|
||||
retries=3,
|
||||
dag=dag)
|
||||
|
||||
part_summary_pos_feign >> part_summary_pos_load
|
||||
|
||||
part_summary_visit_load >> partner_summary_visit_9060
|
||||
part_summary_report_load >> partner_summary_report_6257
|
||||
partner_summary_report_6257 >> task_failed
|
||||
part_summary_pos_load >> task_failed
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Partner1site 全接口抓取脚本
|
||||
分页结束条件:hasNext == False
|
||||
"""
|
||||
|
||||
import random
|
||||
import hmac
|
||||
import hashlib
|
||||
import base64
|
||||
import requests
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Dict, Any
|
||||
import psycopg2
|
||||
|
||||
# ======= 配置区 =======
|
||||
ACCESS_KEY = "75c4ab4d-6a67-4aed-8b1d-5bb64fd36afc"
|
||||
SECRET_KEY = "117347a7dd066a50a4d2973c5f3d5ba9101094c5"
|
||||
|
||||
# URL 用占位符(外部替换)
|
||||
BASE_URLS = {
|
||||
# "visits": "http://onesite.tek.cn/api/summary/pos_datas", # 客户拜访数据
|
||||
# "reports": "http://onesite.tek.cn/api/summary/pos_datas", # 报备数据
|
||||
"pos_datas": "http://onesite.tek.cn/api/summary/pos_datas", # POS数据
|
||||
# "customer_and_contact_datas": "http://onesite.tek.cn/api/summary/pos_datas" # 客户及联系人数据
|
||||
}
|
||||
|
||||
PG_DSN = dict(
|
||||
database="dataops_db",
|
||||
user="dbuser_dba",
|
||||
password="EmBRxnmmjnE3",
|
||||
host="124.221.232.219",
|
||||
port="5432"
|
||||
)
|
||||
|
||||
API_ID = "f5eeff00-8454-408d-843a-d83d90f2" # 外部传入 api_id,占位符
|
||||
# ======================
|
||||
|
||||
|
||||
class Partner1SiteClient:
|
||||
"""Partner1site API 客户端"""
|
||||
|
||||
def __init__(self, access_key: str, secret_key: str):
|
||||
self.ak = access_key
|
||||
self.sk = secret_key
|
||||
|
||||
@staticmethod
|
||||
def urlsafe_b64encode(data: bytes) -> str:
|
||||
return base64.urlsafe_b64encode(data).decode()
|
||||
|
||||
def gen_token(self, expire_sec: int = 600) -> str:
|
||||
"""生成 API Token"""
|
||||
random_num = str(random.randint(100000, 999999))
|
||||
deadline = int(datetime.now(timezone.utc).timestamp()) + expire_sec
|
||||
parm_str = f"{random_num}:{deadline}"
|
||||
enc_parm = self.urlsafe_b64encode(parm_str.encode())
|
||||
sign = hmac.new(self.sk.encode(), enc_parm.encode(), hashlib.sha1).digest()
|
||||
enc_sign = self.urlsafe_b64encode(sign.hex().encode())
|
||||
return f"{self.ak}:{enc_sign}:{enc_parm}"
|
||||
|
||||
def fetch_all_pages(self, api_name: str, params: Dict[str, Any]):
|
||||
"""通用分页请求(结束条件:hasNext == False)"""
|
||||
if api_name not in BASE_URLS:
|
||||
raise ValueError(f"未知 API 数据来源: {api_name}")
|
||||
|
||||
base_url = BASE_URLS[api_name]
|
||||
all_data = []
|
||||
page_num = 0
|
||||
page_size = 50 # 固定每页大小
|
||||
|
||||
while True:
|
||||
token = self.gen_token()
|
||||
params_with_paging = dict(params)
|
||||
params_with_paging.update({
|
||||
"token": token,
|
||||
"size": page_size,
|
||||
"page": page_num
|
||||
})
|
||||
|
||||
resp = requests.get(base_url, params=params_with_paging, timeout=30)
|
||||
resp.raise_for_status()
|
||||
data_json = resp.json()
|
||||
|
||||
if data_json.get("code") != 100 or not data_json.get("success", False):
|
||||
raise RuntimeError(f"{api_name} API 错误: {data_json.get('message')}")
|
||||
|
||||
content = data_json.get("data", {}).get("content", [])
|
||||
all_data.extend(content)
|
||||
total_elements = data_json.get("data", {}).get("totalElements")
|
||||
has_next = data_json.get("data", {}).get("hasNext", False)
|
||||
|
||||
print(f"[{api_name}] 页码 {page_num} -> 本页 {len(content)} 条,累计 {len(all_data)} 条 / 总数 {total_elements}")
|
||||
|
||||
if not has_next:
|
||||
break
|
||||
|
||||
page_num += 1
|
||||
|
||||
return all_data
|
||||
|
||||
|
||||
def save_json_to_pg(data: list, api_id: str) -> None:
|
||||
"""写入 PostgreSQL:软删历史 + 插入新数据"""
|
||||
print(f"[save_to_pg] API={api_id} 写入 PG,记录数={len(data)}")
|
||||
sql = """
|
||||
UPDATE data_api.api_data
|
||||
SET is_loaded = '1'
|
||||
WHERE api_id = %s;
|
||||
|
||||
INSERT INTO data_api.api_data
|
||||
(id, api_id, data, total_num, is_loaded, status,
|
||||
request_tm, execute_tm, remark)
|
||||
VALUES (%s, %s, %s, %s, '0', '0',
|
||||
current_timestamp(0), current_timestamp(0), '');
|
||||
"""
|
||||
try:
|
||||
with psycopg2.connect(**PG_DSN) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql,
|
||||
(api_id,
|
||||
str(uuid.uuid4()),
|
||||
api_id,
|
||||
json.dumps(data, ensure_ascii=False),
|
||||
len(data)))
|
||||
conn.commit()
|
||||
print(f"[save_to_pg] API={api_id} 写入完成")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"PG写入错误: {e}")
|
||||
|
||||
|
||||
def get_previous_date(days: int = 0) -> str:
|
||||
return (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def main():
|
||||
client = Partner1SiteClient(ACCESS_KEY, SECRET_KEY)
|
||||
|
||||
# ✅ 客户拜访数据(执行)
|
||||
"""
|
||||
visits_data = client.fetch_all_pages(
|
||||
api_name="visits",
|
||||
params={} # 拉全量,若有默认时间限制可改成 {"startInsertDate":"2000-01-01","endInsertDate":get_previous_date(0)}
|
||||
)
|
||||
save_json_to_pg(visits_data, API_ID)
|
||||
"""
|
||||
|
||||
# ❌ 报备数据(暂时注释)
|
||||
"""
|
||||
reports_data = client.fetch_all_pages(
|
||||
api_name="reports",
|
||||
params={}
|
||||
)
|
||||
save_json_to_pg(reports_data, API_ID)
|
||||
"""
|
||||
|
||||
# ❌ POS 数据(暂时注释)
|
||||
pos_data = client.fetch_all_pages(
|
||||
api_name="pos_datas",
|
||||
params={}
|
||||
# params={"startPosInsertDate":get_previous_date(0)}
|
||||
)
|
||||
save_json_to_pg(pos_data, API_ID)
|
||||
|
||||
# ❌ 客户及联系人数据(暂时注释)
|
||||
"""
|
||||
cust_contact_data = client.fetch_all_pages(
|
||||
api_name="customer_and_contact_datas",
|
||||
params={}
|
||||
)
|
||||
save_json_to_pg(cust_contact_data, API_ID)
|
||||
"""
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,132 @@
|
|||
/*******Main Section**************************************************************************/
|
||||
\set ON_ERROR_STOP on
|
||||
\set AUTOCOMMIT on
|
||||
\timing on
|
||||
|
||||
DELETE FROM data_api.part_summary_pos;
|
||||
|
||||
insert into data_api.part_summary_pos (
|
||||
address
|
||||
, alias
|
||||
, ap
|
||||
, application
|
||||
, area_name
|
||||
, channel_type
|
||||
, city
|
||||
, contact_department
|
||||
, contact_email
|
||||
, contact_name
|
||||
, contact_phone
|
||||
, contact_title
|
||||
, customer_chanel_name
|
||||
, customer_name
|
||||
, distributor_name
|
||||
, distributor_name2
|
||||
, extended_price
|
||||
, id
|
||||
, industry
|
||||
, invoice_number
|
||||
, online_name
|
||||
, online_or_offline
|
||||
, pos_insert_date
|
||||
, pos_invoice_date
|
||||
, pos_update_date
|
||||
, product_ap
|
||||
, product_qty
|
||||
, province
|
||||
, sales_name
|
||||
, sales_price
|
||||
, sales_trx_currency_code
|
||||
, sub_industry
|
||||
, temp_city_name
|
||||
, transfer
|
||||
, tsm_names_by_alias
|
||||
, zip
|
||||
,etl_tx_dt
|
||||
)
|
||||
select
|
||||
case when trim(both from address)='' then null else address::text end address
|
||||
, case when trim(both from alias)='' then null else alias::text end alias
|
||||
, case when trim(both from ap)='' then null else ap::text end ap
|
||||
, case when trim(both from application)='' then null else application::text end application
|
||||
, case when trim(both from area_name)='' then null else area_name::text end area_name
|
||||
, case when trim(both from channel_type)='' then null else channel_type::text end channel_type
|
||||
, case when trim(both from city)='' then null else city::text end city
|
||||
, case when trim(both from contact_department)='' then null else contact_department::text end contact_department
|
||||
, case when trim(both from contact_email)='' then null else contact_email::text end contact_email
|
||||
, case when trim(both from contact_name)='' then null else contact_name::text end contact_name
|
||||
, case when trim(both from contact_phone)='' then null else contact_phone::text end contact_phone
|
||||
, case when trim(both from contact_title)='' then null else contact_title::text end contact_title
|
||||
, case when trim(both from customer_chanel_name)='' then null else customer_chanel_name::text end customer_chanel_name
|
||||
, case when trim(both from customer_name)='' then null else customer_name::text end customer_name
|
||||
, case when trim(both from distributor_name)='' then null else distributor_name::text end distributor_name
|
||||
, case when trim(both from distributor_name2)='' then null else distributor_name2::text end distributor_name2
|
||||
, case when trim(both from extended_price)='' then null else extended_price::text end extended_price
|
||||
, case when trim(both from id)='' then null else id::text end id
|
||||
, case when trim(both from industry)='' then null else industry::text end industry
|
||||
, case when trim(both from invoice_number)='' then null else invoice_number::text end invoice_number
|
||||
, case when trim(both from online_name)='' then null else online_name::text end online_name
|
||||
, case when trim(both from online_or_offline)='' then null else online_or_offline::text end online_or_offline
|
||||
, case when trim(both from pos_insert_date)='' then null else pos_insert_date::text end pos_insert_date
|
||||
, case when trim(both from pos_invoice_date)='' then null else pos_invoice_date::text end pos_invoice_date
|
||||
, case when trim(both from pos_update_date)='' then null else pos_update_date::text end pos_update_date
|
||||
, case when trim(both from product_ap)='' then null else product_ap::text end product_ap
|
||||
, case when trim(both from product_qty)='' then null else product_qty::text end product_qty
|
||||
, case when trim(both from province)='' then null else province::text end province
|
||||
, case when trim(both from sales_name)='' then null else sales_name::text end sales_name
|
||||
, case when trim(both from sales_price)='' then null else sales_price::text end sales_price
|
||||
, case when trim(both from sales_trx_currency_code)='' then null else sales_trx_currency_code::text end sales_trx_currency_code
|
||||
, case when trim(both from sub_industry)='' then null else sub_industry::text end sub_industry
|
||||
, case when trim(both from temp_city_name)='' then null else temp_city_name::text end temp_city_name
|
||||
, case when trim(both from transfer)='' then null else transfer::text end transfer
|
||||
, case when trim(both from tsm_names_by_alias)='' then null else tsm_names_by_alias::text end tsm_names_by_alias
|
||||
, case when trim(both from zip)='' then null else zip::text end zip
|
||||
,etl_tx_dt
|
||||
from (
|
||||
select
|
||||
(json_array_elements(data::json)::json->>'address') address
|
||||
, (json_array_elements(data::json)::json->>'alias') alias
|
||||
, (json_array_elements(data::json)::json->>'ap') ap
|
||||
, (json_array_elements(data::json)::json->>'application') application
|
||||
, (json_array_elements(data::json)::json->>'areaName') area_name
|
||||
, (json_array_elements(data::json)::json->>'channelType') channel_type
|
||||
, (json_array_elements(data::json)::json->>'city') city
|
||||
, (json_array_elements(data::json)::json->>'contactDepartment') contact_department
|
||||
, (json_array_elements(data::json)::json->>'contactEmail') contact_email
|
||||
, (json_array_elements(data::json)::json->>'contactName') contact_name
|
||||
, (json_array_elements(data::json)::json->>'contactPhone') contact_phone
|
||||
, (json_array_elements(data::json)::json->>'contactTitle') contact_title
|
||||
, (json_array_elements(data::json)::json->>'customerChanelName') customer_chanel_name
|
||||
, (json_array_elements(data::json)::json->>'customerName') customer_name
|
||||
, (json_array_elements(data::json)::json->>'distributorName') distributor_name
|
||||
, (json_array_elements(data::json)::json->>'distributorName2') distributor_name2
|
||||
, (json_array_elements(data::json)::json->>'extendedPrice') extended_price
|
||||
, (json_array_elements(data::json)::json->>'id') id
|
||||
, (json_array_elements(data::json)::json->>'industry') industry
|
||||
, (json_array_elements(data::json)::json->>'invoiceNumber') invoice_number
|
||||
, (json_array_elements(data::json)::json->>'onlineName') online_name
|
||||
, (json_array_elements(data::json)::json->>'onlineOrOffline') online_or_offline
|
||||
, (json_array_elements(data::json)::json->>'posInsertDate') pos_insert_date
|
||||
, (json_array_elements(data::json)::json->>'posInvoiceDate') pos_invoice_date
|
||||
, (json_array_elements(data::json)::json->>'posUpdateDate') pos_update_date
|
||||
, (json_array_elements(data::json)::json->>'productAp') product_ap
|
||||
, (json_array_elements(data::json)::json->>'productQty') product_qty
|
||||
, (json_array_elements(data::json)::json->>'province') province
|
||||
, (json_array_elements(data::json)::json->>'salesName') sales_name
|
||||
, (json_array_elements(data::json)::json->>'salesPrice') sales_price
|
||||
, (json_array_elements(data::json)::json->>'salesTrxCurrencyCode') sales_trx_currency_code
|
||||
, (json_array_elements(data::json)::json->>'subIndustry') sub_industry
|
||||
, (json_array_elements(data::json)::json->>'tempCityName') temp_city_name
|
||||
, (json_array_elements(data::json)::json->>'transfer') transfer
|
||||
, (json_array_elements(data::json)::json->>'tsmNamesByAlias') tsm_names_by_alias
|
||||
, (json_array_elements(data::json)::json->>'zip') zip
|
||||
,CURRENT_TIMESTAMP(0) etl_tx_dt
|
||||
from (select * from data_api.api_data
|
||||
WHERE api_id='f5eeff00-8454-408d-843a-d83d90f2' and is_loaded = '0' order by request_tm desc limit 1) p )p;
|
||||
|
||||
update data_api.api_data
|
||||
set is_loaded = '1' ,
|
||||
status = '1',
|
||||
request_tm = current_timestamp(0)
|
||||
where api_id='f5eeff00-8454-408d-843a-d83d90f2';
|
||||
\q
|
Loading…
Reference in New Issue