from __future__ import annotations from typing import List, Tuple from app.models import ( DataImportAnalysisRequest, LLMMessage, LLMProvider, LLMRole, ) def resolve_provider_from_model(llm_model: str) -> Tuple[LLMProvider, str]: """Resolve provider based on the llm_model string. The llm_model may be provided as 'provider:model' or 'provider/model'. If no provider prefix is present, try an educated guess from common model name patterns. """ normalized = llm_model.strip() provider_hint: str | None = None model_name = normalized for delimiter in (":", "/", "|"): if delimiter in normalized: provider_hint, model_name = normalized.split(delimiter, 1) provider_hint = provider_hint.strip().lower() model_name = model_name.strip() break provider_map = {provider.value: provider for provider in LLMProvider} if provider_hint: if provider_hint not in provider_map: raise ValueError( f"Unsupported provider '{provider_hint}'. Expected one of: {', '.join(provider_map.keys())}." ) return provider_map[provider_hint], model_name return _guess_provider_from_model(model_name), model_name def _guess_provider_from_model(model_name: str) -> LLMProvider: lowered = model_name.lower() if lowered.startswith(("gpt", "o1", "text-", "dall-e", "whisper")): return LLMProvider.OPENAI if lowered.startswith(("claude", "anthropic")): return LLMProvider.ANTHROPIC if lowered.startswith(("gemini", "models/gemini")): return LLMProvider.GEMINI if lowered.startswith("qwen"): return LLMProvider.QWEN if lowered.startswith("deepseek"): return LLMProvider.DEEPSEEK if lowered.startswith(("openrouter", "router-")): return LLMProvider.OPENROUTER supported = ", ".join(provider.value for provider in LLMProvider) raise ValueError( f"Unable to infer provider from model '{model_name}'. " f"Please prefix with 'provider:model'. Supported providers: {supported}." ) def build_import_messages( request: DataImportAnalysisRequest, ) -> List[LLMMessage]: """Create system and user messages for the import analysis prompt.""" headers_formatted = "\n".join(f"- {header}" for header in request.table_headers) system_prompt = ( "你是一名数据导入识别助手。请根据给定的表头和示例数据,判断字段含义、" "典型数据类型以及潜在的数据质量问题。最终请返回一个结构化的JSON。\n" "JSON结构需包含: field_summaries (数组, 每项含 header、meaning、data_type、quality_notes), " "detected_issues (字符串数组),以及 overall_suggestion (字符串)。" ) user_prompt = ( f"导入记录ID: {request.import_record_id}\n\n" "表头信息:\n" f"{headers_formatted}\n\n" "示例数据:\n" f"{request.example_data}\n\n" "请仔细分析示例数据与表头之间的对应关系,并返回符合上述JSON结构的内容。" ) return [ LLMMessage(role=LLMRole.SYSTEM, content=system_prompt), LLMMessage(role=LLMRole.USER, content=user_prompt), ]