diff --git a/.gitignore b/.gitignore index afedc66..d1f6318 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ .venv gx/uncommitted/ -.vscode/ \ No newline at end of file +.vscode/ +__pycache__/ +app/__pycache__ +*.pyc +.DS_Store \ No newline at end of file diff --git a/app/services/import_analysis.py b/app/services/import_analysis.py index 6a20a41..5c92c01 100644 --- a/app/services/import_analysis.py +++ b/app/services/import_analysis.py @@ -1,5 +1,7 @@ from __future__ import annotations +from functools import lru_cache +from pathlib import Path from typing import List, Tuple from app.models import ( @@ -69,23 +71,27 @@ def build_import_messages( """Create system and user messages for the import analysis prompt.""" headers_formatted = "\n".join(f"- {header}" for header in request.table_headers) - system_prompt = ( - "你是一名数据导入识别助手。请根据给定的表头和示例数据,判断字段含义、" - "典型数据类型以及潜在的数据质量问题。最终请返回一个结构化的JSON。\n" - "JSON结构需包含: field_summaries (数组, 每项含 header、meaning、data_type、quality_notes), " - "detected_issues (字符串数组),以及 overall_suggestion (字符串)。" - ) + system_prompt = load_import_template() - user_prompt = ( + data_block = ( f"导入记录ID: {request.import_record_id}\n\n" "表头信息:\n" f"{headers_formatted}\n\n" "示例数据:\n" - f"{request.example_data}\n\n" - "请仔细分析示例数据与表头之间的对应关系,并返回符合上述JSON结构的内容。" + f"{request.example_data}" ) return [ LLMMessage(role=LLMRole.SYSTEM, content=system_prompt), - LLMMessage(role=LLMRole.USER, content=user_prompt), + LLMMessage(role=LLMRole.USER, content=data_block), ] + + +@lru_cache(maxsize=1) +def load_import_template() -> str: + template_path = ( + Path(__file__).resolve().parents[2] / "prompt" / "data_import_analysis.md" + ) + if not template_path.exists(): + raise FileNotFoundError(f"Prompt template not found at {template_path}") + return template_path.read_text(encoding="utf-8").strip() diff --git a/prompt/data_import_analysis.md b/prompt/data_import_analysis.md index 4c42484..73d8bff 100644 --- a/prompt/data_import_analysis.md +++ b/prompt/data_import_analysis.md @@ -39,4 +39,6 @@ 若信息不足,请显式指出“信息不足”并给出补充数据需求清单。 避免武断结论,用“可能 / 候选 / 建议”字样。 -不要捏造样本未出现的值。 \ No newline at end of file +不要捏造样本未出现的值。 + +数据块 \ No newline at end of file