From a78c8b94460e95165c0021ee2c287aa491cd0967 Mon Sep 17 00:00:00 2001 From: zhaoawd Date: Wed, 29 Oct 2025 23:43:06 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E5=92=8C?= =?UTF-8?q?=E7=BB=93=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- deepseek-result.json | 41 +++++++++++++++++++++ test/chat_completions_deepseek_example.py | 43 ++++++++++++++++++++++ test/data_import_analysis_example.py | 10 +++++- test/openrouter_chat_example.py | 44 +++++++++++++++++++++++ 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 deepseek-result.json create mode 100644 test/chat_completions_deepseek_example.py create mode 100644 test/openrouter_chat_example.py diff --git a/deepseek-result.json b/deepseek-result.json new file mode 100644 index 0000000..5996542 --- /dev/null +++ b/deepseek-result.json @@ -0,0 +1,41 @@ +{ + "provider": "deepseek", + "model": "deepseek-chat", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "```json\n{\n \"table_name\": \"national_brand_sales\",\n \"description\": \"全国品牌系统外销售数据\",\n \"columns\": [\n {\n \"original_name\": \"品牌\",\n \"standard_name\": \"brand\",\n \"data_type\": \"string\",\n \"db_type\": \"varchar(50)\",\n \"java_type\": \"string\",\n \"nullable\": true,\n \"distinct_count_sample\": 5,\n \"null_ratio_sample\": 0.4,\n \"is_enum_candidate\": false,\n \"description\": \"品牌名称\",\n \"date_format\": null\n },\n {\n \"original_name\": \"产品价类\",\n \"standard_name\": \"price_category\",\n \"data_type\": \"string\",\n \"db_type\": \"varchar(10)\",\n \"java_type\": \"string\",\n \"nullable\": false,\n \"distinct_count_sample\": 3,\n \"null_ratio_sample\": 0.0,\n \"is_enum_candidate\": true,\n \"description\": \"产品价格分类(一类/二类/三类)\",\n \"date_format\": null\n },\n {\n \"original_name\": \"是否重点品牌" + } + } + ], + "raw": { + "id": "67f3cc80-38bc-4bb7-b336-48d4886722c4", + "object": "chat.completion", + "created": 1761752207, + "model": "deepseek-chat", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "```json\n{\n \"table_name\": \"national_brand_sales\",\n \"description\": \"全国品牌系统外销售数据\",\n \"columns\": [\n {\n \"original_name\": \"品牌\",\n \"standard_name\": \"brand\",\n \"data_type\": \"string\",\n \"db_type\": \"varchar(50)\",\n \"java_type\": \"string\",\n \"nullable\": true,\n \"distinct_count_sample\": 5,\n \"null_ratio_sample\": 0.4,\n \"is_enum_candidate\": false,\n \"description\": \"品牌名称\",\n \"date_format\": null\n },\n {\n \"original_name\": \"产品价类\",\n \"standard_name\": \"price_category\",\n \"data_type\": \"string\",\n \"db_type\": \"varchar(10)\",\n \"java_type\": \"string\",\n \"nullable\": false,\n \"distinct_count_sample\": 3,\n \"null_ratio_sample\": 0.0,\n \"is_enum_candidate\": true,\n \"description\": \"产品价格分类(一类/二类/三类)\",\n \"date_format\": null\n },\n {\n \"original_name\": \"是否重点品牌" + }, + "logprobs": null, + "finish_reason": "length" + } + ], + "usage": { + "prompt_tokens": 1078, + "completion_tokens": 256, + "total_tokens": 1334, + "prompt_tokens_details": { + "cached_tokens": 1024 + }, + "prompt_cache_hit_tokens": 1024, + "prompt_cache_miss_tokens": 54 + }, + "system_fingerprint": "fp_ffc7281d48_prod0820_fp8_kvcache" + } +} \ No newline at end of file diff --git a/test/chat_completions_deepseek_example.py b/test/chat_completions_deepseek_example.py new file mode 100644 index 0000000..66101f4 --- /dev/null +++ b/test/chat_completions_deepseek_example.py @@ -0,0 +1,43 @@ +"""Demonstrates calling /v1/chat/completions with the DeepSeek provider.""" + +from __future__ import annotations + +import asyncio +import json + +import httpx +from dotenv import load_dotenv + + +load_dotenv() + +API_URL = "http://localhost:8000/v1/chat/completions" + + +async def main() -> None: + payload = { + "provider": "deepseek", + "model": "deepseek-chat", + "messages": [ + { + "role": "system", + "content": "角色:你是一名数据分析导入助手(Data Ingestion Analyst),擅长从原始数据抽取结构化元数据、推断字段类型、识别维度/事实属性,并输出导入建模建议(Table + JSON)。\n\n任务目标:对提供的数据(含表头或table schema与若干行样本数据)进行解析,生成一份导入分析与处理报告,指导如何将其导入为标准化表结构及 JSON 元数据定义,不要省略任何字段信息,全量输出。\n\n请从以下两个方向进行思考:\n\n方向 1:元数据识别与整理\n解析表明:根据表头、Origin Table Name、Orign File Name生成表名,表名需要有意义\n解析列名:生成标准化字段名(snake_case 或小驼峰),并给出原始列名与标准字段名映射。\n为每个字段写出中文/英文注释(若无法确定,给出“待确认”并附可能解释)。\n\n方向 2:字段数据类型与格式推断\n针对每列:输出推断数据类型(如 varchar(n) / int / bigint / tinyint / float / double / decimal(p,s) / date / datetime / text)。\n说明推断依据:样本值分布、长度范围、格式正则、是否存在空值、是否数值但含前导零等。\n指出数据质量初步观察:缺失率、是否有异常/离群值(简单规则即可)、是否需标准化(如去空格、去重、枚举值归一)。\n给出“建议处理动作”:如 trim、cast_float、cast_int、cast_double、cast_date、cast_time、cast_datetime,适用于将样本数据转换成数据库表字段兼容的格式。\n若为“可能是枚举”的字段,列出候选枚举值及占比。\n\n最终内容都输出为一个json对象,格式为(字段级与表级定义),字段含:\n{\n \"table_name\": \"标准化后的表名\",\n \"description\": \"表简短描述\",\n \"columns\": [{\n \"original_name\": \"原始名称\",\n \"standard_name\": \"标准化后的名称: 下划线命名,大小写字母、数字、下划线\",\n \"data_type\": \"数据类型限制为:number/string/datetime\",\n \"db_type\": \"数据库字段类型\",\n \"java_type\": \"java字段类型限制为: int/long/double/string/date\",\n \"nullable\": true/false,\n \"distinct_count_sample\": number,\n \"null_ratio_sample\": 0.x,\n \"is_enum_candidate\": true/false,\n \"description\": \"字段简短描述\",\n \"date_format\": \"转换成Date类型的pattern\"\n }]\n}\n\n约束与风格:\n\n若信息不足,请显式指出“信息不足”并给出补充数据需求清单。\n避免武断结论,用“可能 / 候选 / 建议”字样。\n不要捏造样本未出现的值。" + }, + { + "role": "user", + "content": "导入记录ID: demo-import-001\n\n表头信息:\n- 品牌\n- 产品价类\n- 是否重点品牌\n- 系统外销售量(箱)\n- 系统外销售金额(万元)\n- 同期系统外销售量(箱)\n- 同期系统外销售金额(万元)\n\n示例数据:\nCSV样本预览:\n品牌,产品价类,是否重点品牌,系统外销售量(箱),系统外销售金额(万元),同期系统外销售量(箱),同期系统外销售金额(万元)\r\n白沙,一类,重点品牌,3332.406875,64283.5593333333,3123.693375,61821.7986666667\r\nnan,二类,重点品牌,1094.4707375,3859.69366666667,869.65725,3067.00966666667\r\nnan,三类,重点品牌,3965.0457375,8388.306,4401.6714875,8802.132\r\n宝岛,一类,否,39.934375,301.617666666667,30.5975,249.399666666667\r\n长白山,一类,重点品牌,2666.53775,12360.8306666667,1916.252,9051.672\r\nnan,二类,重点品牌,2359.910025,7671.26233333333,2335.2480875,7590.791\r\nnan,三类,重点品牌,1263.293875,2826.665,1590.750875,3503.083\r\n大前门,一类,否,81.5806875,343.721333333333,114.1179875,480.809333333333\r\nnan,三类,否,226.445225,319.975666666667,254.6595125,359.894\r\n大青山,二类,否,60.73525,209.415,60.2415,207.712666666667\n\n附加结构信息:\n{\n \"source\": \"excel\",\n \"file_name\": \"全国品牌.xlsx\",\n \"sheet_name\": \"Sheet1\"\n}" + } + ], + "temperature": 0.2, + "max_tokens": 256, + } + + async with httpx.AsyncClient(timeout=httpx.Timeout(20.0)) as client: + response = await client.post(API_URL, json=payload) + response.raise_for_status() + data = response.json() + print(json.dumps(data, ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/test/data_import_analysis_example.py b/test/data_import_analysis_example.py index dd894f2..973f8fb 100644 --- a/test/data_import_analysis_example.py +++ b/test/data_import_analysis_example.py @@ -7,7 +7,9 @@ from pathlib import Path import httpx import pandas as pd +from dotenv import load_dotenv +load_dotenv() API_URL = "http://localhost:8000/v1/import/analyze" CALLBACK_URL = "http://localhost:8000/__mock__/import-callback" @@ -26,9 +28,15 @@ async def main() -> None: payload = { "import_record_id": "demo-import-001", "rows": rows, - "struce": headers, + "headers": headers, + "table_schema": { + "source": "excel", + "file_name": EXCEL_PATH.name, + "sheet_name": sheet_name, + }, "llm_model": "deepseek:deepseek-chat", "temperature": 0.2, + "max_output_tokens": 256, "callback_url": CALLBACK_URL, } diff --git a/test/openrouter_chat_example.py b/test/openrouter_chat_example.py new file mode 100644 index 0000000..0f67ddf --- /dev/null +++ b/test/openrouter_chat_example.py @@ -0,0 +1,44 @@ +"""Quick demo call against the unified chat endpoint using the OpenRouter provider.""" + +from __future__ import annotations + +import asyncio + +import httpx +from dotenv import load_dotenv + +load_dotenv() + + +API_URL = "http://localhost:8000/v1/chat/completions" + + +async def main() -> None: + payload = { + "provider": "openrouter", + "model": "anthropic/claude-3.5-sonnet", + "messages": [ + { + "role": "system", + "content": "You are an API assistant that writes concise JSON only.", + }, + { + "role": "user", + "content": "Return a JSON object describing this test invocation.", + }, + ], + "temperature": 0.1, + "max_tokens": 300, + } + + async with httpx.AsyncClient(timeout=httpx.Timeout(15.0)) as client: + response = await client.post(API_URL, json=payload) + print("Status:", response.status_code) + try: + print("Body:", response.json()) + except ValueError: + print("Raw Body:", response.text) + + +if __name__ == "__main__": + asyncio.run(main())