init,llm gateway & import_analyse

This commit is contained in:
zhaoawd
2025-10-29 00:38:57 +08:00
commit 0af5f19af9
62 changed files with 3169 additions and 0 deletions

3
app/__init__.py Normal file
View File

@ -0,0 +1,3 @@
from .main import create_app
__all__ = ["create_app"]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

6
app/exceptions.py Normal file
View File

@ -0,0 +1,6 @@
class ProviderConfigurationError(RuntimeError):
"""Raised when a provider is missing required configuration."""
class ProviderAPICallError(RuntimeError):
"""Raised when the upstream provider responds with an error."""

103
app/main.py Normal file
View File

@ -0,0 +1,103 @@
from __future__ import annotations
from contextlib import asynccontextmanager
import httpx
from fastapi import Depends, FastAPI, HTTPException, Request
from app.exceptions import ProviderAPICallError, ProviderConfigurationError
from app.models import (
DataImportAnalysisRequest,
DataImportAnalysisResponse,
LLMRequest,
LLMResponse,
)
from app.services import LLMGateway
from app.services.import_analysis import build_import_messages, resolve_provider_from_model
@asynccontextmanager
async def lifespan(app: FastAPI):
client = httpx.AsyncClient(timeout=httpx.Timeout(30.0))
gateway = LLMGateway()
try:
app.state.http_client = client # type: ignore[attr-defined]
app.state.gateway = gateway # type: ignore[attr-defined]
yield
finally:
await client.aclose()
def create_app() -> FastAPI:
application = FastAPI(
title="Unified LLM Gateway",
version="0.1.0",
lifespan=lifespan,
)
@application.post(
"/v1/chat/completions",
response_model=LLMResponse,
summary="Dispatch chat completion to upstream provider",
)
async def create_chat_completion(
payload: LLMRequest,
gateway: LLMGateway = Depends(get_gateway),
client: httpx.AsyncClient = Depends(get_http_client),
) -> LLMResponse:
try:
return await gateway.chat(payload, client)
except ProviderConfigurationError as exc:
raise HTTPException(status_code=422, detail=str(exc)) from exc
except ProviderAPICallError as exc:
raise HTTPException(status_code=502, detail=str(exc)) from exc
@application.post(
"/v1/import/analyze",
response_model=DataImportAnalysisResponse,
summary="Analyze import sample data via configured LLM",
)
async def analyze_import_data(
payload: DataImportAnalysisRequest,
gateway: LLMGateway = Depends(get_gateway),
client: httpx.AsyncClient = Depends(get_http_client),
) -> DataImportAnalysisResponse:
try:
provider, model_name = resolve_provider_from_model(payload.llm_model)
except ValueError as exc:
raise HTTPException(status_code=422, detail=str(exc)) from exc
messages = build_import_messages(payload)
llm_request = LLMRequest(
provider=provider,
model=model_name,
messages=messages,
temperature=payload.temperature if payload.temperature is not None else 0.2,
max_tokens=payload.max_tokens,
)
try:
llm_response = await gateway.chat(llm_request, client)
except ProviderConfigurationError as exc:
raise HTTPException(status_code=422, detail=str(exc)) from exc
except ProviderAPICallError as exc:
raise HTTPException(status_code=502, detail=str(exc)) from exc
return DataImportAnalysisResponse(
import_record_id=payload.import_record_id,
llm_response=llm_response,
)
return application
async def get_gateway(request: Request) -> LLMGateway:
return request.app.state.gateway # type: ignore[return-value, attr-defined]
async def get_http_client(request: Request) -> httpx.AsyncClient:
return request.app.state.http_client # type: ignore[return-value, attr-defined]
app = create_app()

92
app/models.py Normal file
View File

@ -0,0 +1,92 @@
from __future__ import annotations
from enum import Enum
from typing import Any, List, Optional
from pydantic import BaseModel, Field
class LLMRole(str, Enum):
USER = "user"
ASSISTANT = "assistant"
SYSTEM = "system"
class LLMMessage(BaseModel):
role: LLMRole = Field(..., description="Message author role.")
content: str = Field(..., description="Plain text content of the message.")
class LLMProvider(str, Enum):
OPENAI = "openai"
ANTHROPIC = "anthropic"
OPENROUTER = "openrouter"
GEMINI = "gemini"
QWEN = "qwen"
DEEPSEEK = "deepseek"
class LLMRequest(BaseModel):
provider: LLMProvider = Field(..., description="Target LLM provider identifier.")
model: str = Field(..., description="Model name understood by the provider.")
messages: List[LLMMessage] = Field(..., description="Ordered chat messages.")
temperature: Optional[float] = Field(
0.7, description="Sampling temperature when supported."
)
top_p: Optional[float] = Field(
None, description="Top-p nucleus sampling when supported."
)
max_tokens: Optional[int] = Field(
None, description="Maximum tokens to generate when supported."
)
stream: Optional[bool] = Field(
False, description="Enable provider streaming if both sides support it."
)
extra_params: Optional[dict[str, Any]] = Field(
None, description="Provider-specific parameters to merge into the payload."
)
class LLMChoice(BaseModel):
index: int
message: LLMMessage
class LLMResponse(BaseModel):
provider: LLMProvider
model: str
choices: List[LLMChoice]
raw: Optional[dict[str, Any]] = Field(
None, description="Raw provider response for debugging."
)
class DataImportAnalysisRequest(BaseModel):
import_record_id: str = Field(..., description="Unique identifier for this import run.")
example_data: str = Field(
...,
max_length=30_000,
description="Sample rows from the import payload. Limited to 30k characters.",
)
table_headers: List[str] = Field(
...,
min_length=1,
description="Ordered list of table headers associated with the data.",
)
llm_model: str = Field(
...,
description="Model identifier. Accepts 'provider:model' format or plain model name.",
)
temperature: Optional[float] = Field(
None,
description="Optional override for LLM temperature when generating recognition output.",
)
max_tokens: Optional[int] = Field(
None,
description="Optional override for maximum tokens generated during recognition.",
)
class DataImportAnalysisResponse(BaseModel):
import_record_id: str
llm_response: LLMResponse

17
app/providers/__init__.py Normal file
View File

@ -0,0 +1,17 @@
from .anthropic import AnthropicProvider
from .base import LLMProviderClient
from .deepseek import DeepSeekProvider
from .gemini import GeminiProvider
from .openai import OpenAIProvider
from .openrouter import OpenRouterProvider
from .qwen import QwenProvider
__all__ = [
"LLMProviderClient",
"OpenAIProvider",
"AnthropicProvider",
"OpenRouterProvider",
"GeminiProvider",
"QwenProvider",
"DeepSeekProvider",
]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,97 @@
from __future__ import annotations
from typing import Any, Dict, List, Tuple
import httpx
from app.exceptions import ProviderAPICallError
from app.models import (
LLMChoice,
LLMMessage,
LLMProvider,
LLMRequest,
LLMResponse,
LLMRole,
)
from app.providers.base import LLMProviderClient
class AnthropicProvider(LLMProviderClient):
name = LLMProvider.ANTHROPIC.value
api_key_env = "ANTHROPIC_API_KEY"
base_url = "https://api.anthropic.com/v1/messages"
anthropic_version = "2023-06-01"
async def chat(
self, request: LLMRequest, client: httpx.AsyncClient
) -> LLMResponse:
self.ensure_stream_supported(request.stream)
system_prompt, chat_messages = self._convert_messages(request.messages)
payload = self.merge_payload(
{
"model": request.model,
"messages": chat_messages,
"max_tokens": request.max_tokens or 1024,
"temperature": request.temperature,
"top_p": request.top_p,
},
request.extra_params,
)
if system_prompt:
payload["system"] = system_prompt
headers = {
"x-api-key": self.api_key,
"anthropic-version": self.anthropic_version,
"content-type": "application/json",
}
try:
response = await client.post(self.base_url, json=payload, headers=headers)
response.raise_for_status()
except httpx.HTTPError as exc:
raise ProviderAPICallError(f"Anthropic request failed: {exc}") from exc
data: Dict[str, Any] = response.json()
message = self._build_message(data)
return LLMResponse(
provider=LLMProvider.ANTHROPIC,
model=data.get("model", request.model),
choices=[LLMChoice(index=0, message=message)],
raw=data,
)
@staticmethod
def _convert_messages(
messages: List[LLMMessage],
) -> Tuple[str | None, List[dict[str, Any]]]:
system_parts: List[str] = []
chat_payload: List[dict[str, Any]] = []
for msg in messages:
if msg.role == LLMRole.SYSTEM:
system_parts.append(msg.content)
continue
role = "user" if msg.role == LLMRole.USER else "assistant"
chat_payload.append(
{"role": role, "content": [{"type": "text", "text": msg.content}]}
)
system_prompt = "\n\n".join(system_parts) if system_parts else None
return system_prompt, chat_payload
@staticmethod
def _build_message(data: Dict[str, Any]) -> LLMMessage:
role = data.get("role", "assistant")
content_blocks = data.get("content", [])
text_parts = [
block.get("text", "")
for block in content_blocks
if isinstance(block, dict) and block.get("type") == "text"
]
content = "\n\n".join(part for part in text_parts if part)
return LLMMessage(role=role, content=content)

44
app/providers/base.py Normal file
View File

@ -0,0 +1,44 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any
import httpx
from app.exceptions import ProviderConfigurationError
from app.models import LLMRequest, LLMResponse
class LLMProviderClient(ABC):
"""Base class for provider-specific chat completion clients."""
name: str
api_key_env: str | None = None
supports_stream: bool = False
def __init__(self, api_key: str | None):
if self.api_key_env and not api_key:
raise ProviderConfigurationError(
f"Provider '{self.name}' requires environment variable '{self.api_key_env}'."
)
self.api_key = api_key or ""
@abstractmethod
async def chat(
self, request: LLMRequest, client: httpx.AsyncClient
) -> LLMResponse:
"""Execute a chat completion call."""
@staticmethod
def merge_payload(base: dict[str, Any], extra: dict[str, Any] | None) -> dict[str, Any]:
"""Merge provider payload with optional extra params, ignoring None values."""
merged = {k: v for k, v in base.items() if v is not None}
if extra:
merged.update({k: v for k, v in extra.items() if v is not None})
return merged
def ensure_stream_supported(self, stream_requested: bool) -> None:
if stream_requested and not self.supports_stream:
raise ProviderConfigurationError(
f"Provider '{self.name}' does not support streaming mode."
)

66
app/providers/deepseek.py Normal file
View File

@ -0,0 +1,66 @@
from __future__ import annotations
from typing import Any, Dict, List
import httpx
from app.exceptions import ProviderAPICallError
from app.models import LLMChoice, LLMMessage, LLMProvider, LLMRequest, LLMResponse
from app.providers.base import LLMProviderClient
class DeepSeekProvider(LLMProviderClient):
name = LLMProvider.DEEPSEEK.value
api_key_env = "DEEPSEEK_API_KEY"
supports_stream = True
base_url = "https://api.deepseek.com/v1/chat/completions"
async def chat(
self, request: LLMRequest, client: httpx.AsyncClient
) -> LLMResponse:
self.ensure_stream_supported(request.stream)
payload = self.merge_payload(
{
"model": request.model,
"messages": [msg.model_dump() for msg in request.messages],
"temperature": request.temperature,
"top_p": request.top_p,
"max_tokens": request.max_tokens,
"stream": request.stream,
},
request.extra_params,
)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
try:
response = await client.post(self.base_url, json=payload, headers=headers)
response.raise_for_status()
except httpx.HTTPError as exc:
raise ProviderAPICallError(f"DeepSeek request failed: {exc}") from exc
data: Dict[str, Any] = response.json()
choices = self._build_choices(data.get("choices", []))
return LLMResponse(
provider=LLMProvider.DEEPSEEK,
model=data.get("model", request.model),
choices=choices,
raw=data,
)
@staticmethod
def _build_choices(choices: List[dict[str, Any]]) -> List[LLMChoice]:
built: List[LLMChoice] = []
for choice in choices:
message_data = choice.get("message") or {}
message = LLMMessage(
role=message_data.get("role", "assistant"),
content=message_data.get("content", ""),
)
built.append(LLMChoice(index=choice.get("index", len(built)), message=message))
return built

112
app/providers/gemini.py Normal file
View File

@ -0,0 +1,112 @@
from __future__ import annotations
from typing import Any, Dict, List, Tuple
import httpx
from app.exceptions import ProviderAPICallError
from app.models import (
LLMChoice,
LLMMessage,
LLMProvider,
LLMRequest,
LLMResponse,
LLMRole,
)
from app.providers.base import LLMProviderClient
class GeminiProvider(LLMProviderClient):
name = LLMProvider.GEMINI.value
api_key_env = "GEMINI_API_KEY"
base_url = "https://generativelanguage.googleapis.com/v1beta"
async def chat(
self, request: LLMRequest, client: httpx.AsyncClient
) -> LLMResponse:
self.ensure_stream_supported(request.stream)
system_instruction, contents = self._convert_messages(request.messages)
config = {
"temperature": request.temperature,
"topP": request.top_p,
"maxOutputTokens": request.max_tokens,
}
payload: Dict[str, Any] = self.merge_payload(
{"contents": contents}, request.extra_params
)
generation_config = {k: v for k, v in config.items() if v is not None}
if generation_config:
payload["generationConfig"] = generation_config
if system_instruction:
payload["systemInstruction"] = {
"role": "system",
"parts": [{"text": system_instruction}],
}
endpoint = f"{self.base_url}/models/{request.model}:generateContent?key={self.api_key}"
headers = {"Content-Type": "application/json"}
try:
response = await client.post(endpoint, json=payload, headers=headers)
response.raise_for_status()
except httpx.HTTPError as exc:
raise ProviderAPICallError(f"Gemini request failed: {exc}") from exc
data: Dict[str, Any] = response.json()
choices = self._build_choices(data.get("candidates", []))
return LLMResponse(
provider=LLMProvider.GEMINI,
model=request.model,
choices=choices,
raw=data,
)
@staticmethod
def _convert_messages(
messages: List[LLMMessage],
) -> Tuple[str | None, List[dict[str, Any]]]:
system_parts: List[str] = []
contents: List[dict[str, Any]] = []
for msg in messages:
if msg.role == LLMRole.SYSTEM:
system_parts.append(msg.content)
continue
role = "user" if msg.role == LLMRole.USER else "model"
contents.append({"role": role, "parts": [{"text": msg.content}]})
system_instruction = "\n\n".join(system_parts) if system_parts else None
return system_instruction, contents
@staticmethod
def _build_choices(candidates: List[dict[str, Any]]) -> List[LLMChoice]:
choices: List[LLMChoice] = []
for idx, candidate in enumerate(candidates):
content = candidate.get("content", {})
parts = content.get("parts", [])
text_parts = [
part.get("text", "")
for part in parts
if isinstance(part, dict) and part.get("text")
]
text = "\n\n".join(text_parts)
choices.append(
LLMChoice(
index=candidate.get("index", idx),
message=LLMMessage(role="assistant", content=text),
)
)
if not choices:
choices.append(
LLMChoice(
index=0,
message=LLMMessage(role="assistant", content=""),
)
)
return choices

66
app/providers/openai.py Normal file
View File

@ -0,0 +1,66 @@
from __future__ import annotations
from typing import Any, Dict, List
import httpx
from app.exceptions import ProviderAPICallError
from app.models import LLMChoice, LLMMessage, LLMProvider, LLMRequest, LLMResponse
from app.providers.base import LLMProviderClient
class OpenAIProvider(LLMProviderClient):
name = LLMProvider.OPENAI.value
api_key_env = "OPENAI_API_KEY"
supports_stream = True
base_url = "https://api.openai.com/v1/chat/completions"
async def chat(
self, request: LLMRequest, client: httpx.AsyncClient
) -> LLMResponse:
self.ensure_stream_supported(request.stream)
payload = self.merge_payload(
{
"model": request.model,
"messages": [msg.model_dump() for msg in request.messages],
"temperature": request.temperature,
"top_p": request.top_p,
"max_tokens": request.max_tokens,
"stream": request.stream,
},
request.extra_params,
)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
try:
response = await client.post(self.base_url, json=payload, headers=headers)
response.raise_for_status()
except httpx.HTTPError as exc:
raise ProviderAPICallError(f"OpenAI request failed: {exc}") from exc
data: Dict[str, Any] = response.json()
choices = self._build_choices(data.get("choices", []))
return LLMResponse(
provider=LLMProvider.OPENAI,
model=data.get("model", request.model),
choices=choices,
raw=data,
)
@staticmethod
def _build_choices(choices: List[dict[str, Any]]) -> List[LLMChoice]:
built: List[LLMChoice] = []
for choice in choices:
message_data = choice.get("message") or {}
message = LLMMessage(
role=message_data.get("role", "assistant"), # fallback to assistant
content=message_data.get("content", ""),
)
built.append(LLMChoice(index=choice.get("index", len(built)), message=message))
return built

View File

@ -0,0 +1,77 @@
from __future__ import annotations
import os
from typing import Any, Dict, List
import httpx
from app.exceptions import ProviderAPICallError
from app.models import LLMChoice, LLMMessage, LLMProvider, LLMRequest, LLMResponse
from app.providers.base import LLMProviderClient
class OpenRouterProvider(LLMProviderClient):
name = LLMProvider.OPENROUTER.value
api_key_env = "OPENROUTER_API_KEY"
supports_stream = True
base_url = "https://openrouter.ai/api/v1/chat/completions"
def __init__(self, api_key: str | None):
super().__init__(api_key)
self.site_url = os.getenv("OPENROUTER_SITE_URL")
self.app_name = os.getenv("OPENROUTER_APP_NAME")
async def chat(
self, request: LLMRequest, client: httpx.AsyncClient
) -> LLMResponse:
self.ensure_stream_supported(request.stream)
payload = self.merge_payload(
{
"model": request.model,
"messages": [msg.model_dump() for msg in request.messages],
"temperature": request.temperature,
"top_p": request.top_p,
"max_tokens": request.max_tokens,
"stream": request.stream,
},
request.extra_params,
)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
if self.site_url:
headers["HTTP-Referer"] = self.site_url
if self.app_name:
headers["X-Title"] = self.app_name
try:
response = await client.post(self.base_url, json=payload, headers=headers)
response.raise_for_status()
except httpx.HTTPError as exc:
raise ProviderAPICallError(f"OpenRouter request failed: {exc}") from exc
data: Dict[str, Any] = response.json()
choices = self._build_choices(data.get("choices", []))
return LLMResponse(
provider=LLMProvider.OPENROUTER,
model=data.get("model", request.model),
choices=choices,
raw=data,
)
@staticmethod
def _build_choices(choices: List[dict[str, Any]]) -> List[LLMChoice]:
built: List[LLMChoice] = []
for choice in choices:
message_data = choice.get("message") or {}
message = LLMMessage(
role=message_data.get("role", "assistant"),
content=message_data.get("content", ""),
)
built.append(LLMChoice(index=choice.get("index", len(built)), message=message))
return built

87
app/providers/qwen.py Normal file
View File

@ -0,0 +1,87 @@
from __future__ import annotations
from typing import Any, Dict, List
import httpx
from app.exceptions import ProviderAPICallError
from app.models import LLMChoice, LLMMessage, LLMProvider, LLMRequest, LLMResponse
from app.providers.base import LLMProviderClient
class QwenProvider(LLMProviderClient):
name = LLMProvider.QWEN.value
api_key_env = "QWEN_API_KEY"
base_url = (
"https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
)
async def chat(
self, request: LLMRequest, client: httpx.AsyncClient
) -> LLMResponse:
self.ensure_stream_supported(request.stream)
parameters = {
"temperature": request.temperature,
"top_p": request.top_p,
}
if request.max_tokens is not None:
parameters["max_output_tokens"] = request.max_tokens
# Strip None values from parameters
parameters = {k: v for k, v in parameters.items() if v is not None}
payload: Dict[str, Any] = {
"model": request.model,
"input": {"messages": [msg.model_dump() for msg in request.messages]},
}
if parameters:
payload["parameters"] = parameters
payload = self.merge_payload(payload, request.extra_params)
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
try:
response = await client.post(self.base_url, json=payload, headers=headers)
response.raise_for_status()
except httpx.HTTPError as exc:
raise ProviderAPICallError(f"Qwen request failed: {exc}") from exc
data: Dict[str, Any] = response.json()
choices = self._build_choices(data.get("output", {}))
return LLMResponse(
provider=LLMProvider.QWEN,
model=request.model,
choices=choices,
raw=data,
)
@staticmethod
def _build_choices(output: Dict[str, Any]) -> List[LLMChoice]:
choices_payload = output.get("choices", [])
if not choices_payload and output.get("text"):
return [
LLMChoice(
index=0,
message=LLMMessage(role="assistant", content=output["text"]),
)
]
choices: List[LLMChoice] = []
for idx, choice in enumerate(choices_payload):
message_data = choice.get("message") or {}
message = LLMMessage(
role=message_data.get("role", "assistant"),
content=message_data.get("content", ""),
)
choices.append(LLMChoice(index=choice.get("index", idx), message=message))
if not choices:
choices.append(
LLMChoice(index=0, message=LLMMessage(role="assistant", content=""))
)
return choices

3
app/services/__init__.py Normal file
View File

@ -0,0 +1,3 @@
from .gateway import LLMGateway
__all__ = ["LLMGateway"]

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

53
app/services/gateway.py Normal file
View File

@ -0,0 +1,53 @@
from __future__ import annotations
import os
from typing import Dict, Type
import httpx
from app.exceptions import ProviderConfigurationError
from app.models import LLMProvider, LLMRequest, LLMResponse
from app.providers import (
AnthropicProvider,
DeepSeekProvider,
GeminiProvider,
LLMProviderClient,
OpenAIProvider,
OpenRouterProvider,
QwenProvider,
)
class LLMGateway:
"""Simple registry that dispatches chat requests to provider clients."""
def __init__(self) -> None:
self._providers: Dict[LLMProvider, LLMProviderClient] = {}
self._factory: Dict[LLMProvider, Type[LLMProviderClient]] = {
LLMProvider.OPENAI: OpenAIProvider,
LLMProvider.ANTHROPIC: AnthropicProvider,
LLMProvider.OPENROUTER: OpenRouterProvider,
LLMProvider.GEMINI: GeminiProvider,
LLMProvider.QWEN: QwenProvider,
LLMProvider.DEEPSEEK: DeepSeekProvider,
}
def get_provider(self, provider: LLMProvider) -> LLMProviderClient:
if provider not in self._factory:
raise ProviderConfigurationError(f"Unsupported provider '{provider.value}'.")
if provider not in self._providers:
self._providers[provider] = self._build_provider(provider)
return self._providers[provider]
def _build_provider(self, provider: LLMProvider) -> LLMProviderClient:
provider_cls = self._factory[provider]
api_key_env = getattr(provider_cls, "api_key_env", None)
api_key = os.getenv(api_key_env) if api_key_env else None
return provider_cls(api_key)
async def chat(
self, request: LLMRequest, client: httpx.AsyncClient
) -> LLMResponse:
provider_client = self.get_provider(request.provider)
return await provider_client.chat(request, client)

View File

@ -0,0 +1,91 @@
from __future__ import annotations
from typing import List, Tuple
from app.models import (
DataImportAnalysisRequest,
LLMMessage,
LLMProvider,
LLMRole,
)
def resolve_provider_from_model(llm_model: str) -> Tuple[LLMProvider, str]:
"""Resolve provider based on the llm_model string.
The llm_model may be provided as 'provider:model' or 'provider/model'.
If no provider prefix is present, try an educated guess from common model name patterns.
"""
normalized = llm_model.strip()
provider_hint: str | None = None
model_name = normalized
for delimiter in (":", "/", "|"):
if delimiter in normalized:
provider_hint, model_name = normalized.split(delimiter, 1)
provider_hint = provider_hint.strip().lower()
model_name = model_name.strip()
break
provider_map = {provider.value: provider for provider in LLMProvider}
if provider_hint:
if provider_hint not in provider_map:
raise ValueError(
f"Unsupported provider '{provider_hint}'. Expected one of: {', '.join(provider_map.keys())}."
)
return provider_map[provider_hint], model_name
return _guess_provider_from_model(model_name), model_name
def _guess_provider_from_model(model_name: str) -> LLMProvider:
lowered = model_name.lower()
if lowered.startswith(("gpt", "o1", "text-", "dall-e", "whisper")):
return LLMProvider.OPENAI
if lowered.startswith(("claude", "anthropic")):
return LLMProvider.ANTHROPIC
if lowered.startswith(("gemini", "models/gemini")):
return LLMProvider.GEMINI
if lowered.startswith("qwen"):
return LLMProvider.QWEN
if lowered.startswith("deepseek"):
return LLMProvider.DEEPSEEK
if lowered.startswith(("openrouter", "router-")):
return LLMProvider.OPENROUTER
supported = ", ".join(provider.value for provider in LLMProvider)
raise ValueError(
f"Unable to infer provider from model '{model_name}'. "
f"Please prefix with 'provider:model'. Supported providers: {supported}."
)
def build_import_messages(
request: DataImportAnalysisRequest,
) -> List[LLMMessage]:
"""Create system and user messages for the import analysis prompt."""
headers_formatted = "\n".join(f"- {header}" for header in request.table_headers)
system_prompt = (
"你是一名数据导入识别助手。请根据给定的表头和示例数据,判断字段含义、"
"典型数据类型以及潜在的数据质量问题。最终请返回一个结构化的JSON。\n"
"JSON结构需包含: field_summaries (数组, 每项含 header、meaning、data_type、quality_notes), "
"detected_issues (字符串数组),以及 overall_suggestion (字符串)。"
)
user_prompt = (
f"导入记录ID: {request.import_record_id}\n\n"
"表头信息:\n"
f"{headers_formatted}\n\n"
"示例数据:\n"
f"{request.example_data}\n\n"
"请仔细分析示例数据与表头之间的对应关系并返回符合上述JSON结构的内容。"
)
return [
LLMMessage(role=LLMRole.SYSTEM, content=system_prompt),
LLMMessage(role=LLMRole.USER, content=user_prompt),
]