数据导入接口增加prompt拼接内容
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,3 +1,7 @@
|
|||||||
.venv
|
.venv
|
||||||
gx/uncommitted/
|
gx/uncommitted/
|
||||||
.vscode/
|
.vscode/
|
||||||
|
__pycache__/
|
||||||
|
app/__pycache__
|
||||||
|
*.pyc
|
||||||
|
.DS_Store
|
||||||
@ -1,5 +1,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from functools import lru_cache
|
||||||
|
from pathlib import Path
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from app.models import (
|
from app.models import (
|
||||||
@ -69,23 +71,27 @@ def build_import_messages(
|
|||||||
"""Create system and user messages for the import analysis prompt."""
|
"""Create system and user messages for the import analysis prompt."""
|
||||||
headers_formatted = "\n".join(f"- {header}" for header in request.table_headers)
|
headers_formatted = "\n".join(f"- {header}" for header in request.table_headers)
|
||||||
|
|
||||||
system_prompt = (
|
system_prompt = load_import_template()
|
||||||
"你是一名数据导入识别助手。请根据给定的表头和示例数据,判断字段含义、"
|
|
||||||
"典型数据类型以及潜在的数据质量问题。最终请返回一个结构化的JSON。\n"
|
|
||||||
"JSON结构需包含: field_summaries (数组, 每项含 header、meaning、data_type、quality_notes), "
|
|
||||||
"detected_issues (字符串数组),以及 overall_suggestion (字符串)。"
|
|
||||||
)
|
|
||||||
|
|
||||||
user_prompt = (
|
data_block = (
|
||||||
f"导入记录ID: {request.import_record_id}\n\n"
|
f"导入记录ID: {request.import_record_id}\n\n"
|
||||||
"表头信息:\n"
|
"表头信息:\n"
|
||||||
f"{headers_formatted}\n\n"
|
f"{headers_formatted}\n\n"
|
||||||
"示例数据:\n"
|
"示例数据:\n"
|
||||||
f"{request.example_data}\n\n"
|
f"{request.example_data}"
|
||||||
"请仔细分析示例数据与表头之间的对应关系,并返回符合上述JSON结构的内容。"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return [
|
return [
|
||||||
LLMMessage(role=LLMRole.SYSTEM, content=system_prompt),
|
LLMMessage(role=LLMRole.SYSTEM, content=system_prompt),
|
||||||
LLMMessage(role=LLMRole.USER, content=user_prompt),
|
LLMMessage(role=LLMRole.USER, content=data_block),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def load_import_template() -> str:
|
||||||
|
template_path = (
|
||||||
|
Path(__file__).resolve().parents[2] / "prompt" / "data_import_analysis.md"
|
||||||
|
)
|
||||||
|
if not template_path.exists():
|
||||||
|
raise FileNotFoundError(f"Prompt template not found at {template_path}")
|
||||||
|
return template_path.read_text(encoding="utf-8").strip()
|
||||||
|
|||||||
@ -39,4 +39,6 @@
|
|||||||
|
|
||||||
若信息不足,请显式指出“信息不足”并给出补充数据需求清单。
|
若信息不足,请显式指出“信息不足”并给出补充数据需求清单。
|
||||||
避免武断结论,用“可能 / 候选 / 建议”字样。
|
避免武断结论,用“可能 / 候选 / 建议”字样。
|
||||||
不要捏造样本未出现的值。
|
不要捏造样本未出现的值。
|
||||||
|
|
||||||
|
数据块
|
||||||
Reference in New Issue
Block a user