数据导入接口增加prompt拼接内容

2025-10-29 00:53:07 +08:00
parent 59b5e9410b
commit 76b8c9d79b
3 changed files with 24 additions and 12 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,7 @@
 .venv
 gx/uncommitted/
-.vscode/
+.vscode/
+__pycache__/
+app/__pycache__
+*.pyc
+.DS_Store
--- a/app/services/import_analysis.py
+++ b/app/services/import_analysis.py
@ -1,5 +1,7 @@
 from __future__ import annotations

+from functools import lru_cache
+from pathlib import Path
 from typing import List, Tuple

 from app.models import (
@ -69,23 +71,27 @@ def build_import_messages(
    """Create system and user messages for the import analysis prompt."""
    headers_formatted = "\n".join(f"- {header}" for header in request.table_headers)

-    system_prompt = (
-        "你是一名数据导入识别助手。请根据给定的表头和示例数据，判断字段含义、"
-        "典型数据类型以及潜在的数据质量问题。最终请返回一个结构化的JSON。\n"
-        "JSON结构需包含: field_summaries (数组, 每项含 header、meaning、data_type、quality_notes), "
-        "detected_issues (字符串数组)，以及 overall_suggestion (字符串)。"
-    )
+    system_prompt = load_import_template()

-    user_prompt = (
+    data_block = (
        f"导入记录ID: {request.import_record_id}\n\n"
        "表头信息:\n"
        f"{headers_formatted}\n\n"
        "示例数据:\n"
-        f"{request.example_data}\n\n"
-        "请仔细分析示例数据与表头之间的对应关系，并返回符合上述JSON结构的内容。"
+        f"{request.example_data}"
    )

    return [
        LLMMessage(role=LLMRole.SYSTEM, content=system_prompt),
-        LLMMessage(role=LLMRole.USER, content=user_prompt),
+        LLMMessage(role=LLMRole.USER, content=data_block),
    ]
+
+
+@lru_cache(maxsize=1)
+def load_import_template() -> str:
+    template_path = (
+        Path(__file__).resolve().parents[2] / "prompt" / "data_import_analysis.md"
+    )
+    if not template_path.exists():
+        raise FileNotFoundError(f"Prompt template not found at {template_path}")
+    return template_path.read_text(encoding="utf-8").strip()
--- a/prompt/data_import_analysis.md
+++ b/prompt/data_import_analysis.md
@ -39,4 +39,6 @@

 若信息不足，请显式指出“信息不足”并给出补充数据需求清单。
 避免武断结论，用“可能 / 候选 / 建议”字样。
-不要捏造样本未出现的值。
+不要捏造样本未出现的值。
+
+数据块