Files
data-ge/app/services/import_analysis.py
2025-10-29 00:38:57 +08:00

92 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
from typing import List, Tuple
from app.models import (
DataImportAnalysisRequest,
LLMMessage,
LLMProvider,
LLMRole,
)
def resolve_provider_from_model(llm_model: str) -> Tuple[LLMProvider, str]:
"""Resolve provider based on the llm_model string.
The llm_model may be provided as 'provider:model' or 'provider/model'.
If no provider prefix is present, try an educated guess from common model name patterns.
"""
normalized = llm_model.strip()
provider_hint: str | None = None
model_name = normalized
for delimiter in (":", "/", "|"):
if delimiter in normalized:
provider_hint, model_name = normalized.split(delimiter, 1)
provider_hint = provider_hint.strip().lower()
model_name = model_name.strip()
break
provider_map = {provider.value: provider for provider in LLMProvider}
if provider_hint:
if provider_hint not in provider_map:
raise ValueError(
f"Unsupported provider '{provider_hint}'. Expected one of: {', '.join(provider_map.keys())}."
)
return provider_map[provider_hint], model_name
return _guess_provider_from_model(model_name), model_name
def _guess_provider_from_model(model_name: str) -> LLMProvider:
lowered = model_name.lower()
if lowered.startswith(("gpt", "o1", "text-", "dall-e", "whisper")):
return LLMProvider.OPENAI
if lowered.startswith(("claude", "anthropic")):
return LLMProvider.ANTHROPIC
if lowered.startswith(("gemini", "models/gemini")):
return LLMProvider.GEMINI
if lowered.startswith("qwen"):
return LLMProvider.QWEN
if lowered.startswith("deepseek"):
return LLMProvider.DEEPSEEK
if lowered.startswith(("openrouter", "router-")):
return LLMProvider.OPENROUTER
supported = ", ".join(provider.value for provider in LLMProvider)
raise ValueError(
f"Unable to infer provider from model '{model_name}'. "
f"Please prefix with 'provider:model'. Supported providers: {supported}."
)
def build_import_messages(
request: DataImportAnalysisRequest,
) -> List[LLMMessage]:
"""Create system and user messages for the import analysis prompt."""
headers_formatted = "\n".join(f"- {header}" for header in request.table_headers)
system_prompt = (
"你是一名数据导入识别助手。请根据给定的表头和示例数据,判断字段含义、"
"典型数据类型以及潜在的数据质量问题。最终请返回一个结构化的JSON。\n"
"JSON结构需包含: field_summaries (数组, 每项含 header、meaning、data_type、quality_notes), "
"detected_issues (字符串数组),以及 overall_suggestion (字符串)。"
)
user_prompt = (
f"导入记录ID: {request.import_record_id}\n\n"
"表头信息:\n"
f"{headers_formatted}\n\n"
"示例数据:\n"
f"{request.example_data}\n\n"
"请仔细分析示例数据与表头之间的对应关系并返回符合上述JSON结构的内容。"
)
return [
LLMMessage(role=LLMRole.SYSTEM, content=system_prompt),
LLMMessage(role=LLMRole.USER, content=user_prompt),
]