Files
data-ge/app/models.py
2025-10-29 23:43:26 +08:00

138 lines
4.4 KiB
Python

from __future__ import annotations
from enum import Enum
from typing import Any, Dict, List, Optional, Union
from pydantic import BaseModel, Field, HttpUrl
from app.settings import DEFAULT_IMPORT_MODEL
class LLMRole(str, Enum):
USER = "user"
ASSISTANT = "assistant"
SYSTEM = "system"
class LLMMessage(BaseModel):
role: LLMRole = Field(..., description="Message author role.")
content: str = Field(..., description="Plain text content of the message.")
class LLMProvider(str, Enum):
OPENAI = "openai"
ANTHROPIC = "anthropic"
OPENROUTER = "openrouter"
GEMINI = "gemini"
QWEN = "qwen"
DEEPSEEK = "deepseek"
class LLMRequest(BaseModel):
provider: LLMProvider = Field(..., description="Target LLM provider identifier.")
model: str = Field(..., description="Model name understood by the provider.")
messages: List[LLMMessage] = Field(..., description="Ordered chat messages.")
temperature: Optional[float] = Field(
0.7, description="Sampling temperature when supported."
)
top_p: Optional[float] = Field(
None, description="Top-p nucleus sampling when supported."
)
max_tokens: Optional[int] = Field(
None, description="Maximum tokens to generate when supported."
)
stream: Optional[bool] = Field(
False, description="Enable provider streaming if both sides support it."
)
extra_params: Optional[dict[str, Any]] = Field(
None, description="Provider-specific parameters to merge into the payload."
)
class LLMChoice(BaseModel):
index: int
message: LLMMessage
class LLMResponse(BaseModel):
provider: LLMProvider
model: str
choices: List[LLMChoice]
raw: Optional[dict[str, Any]] = Field(
None, description="Raw provider response for debugging."
)
class DataImportAnalysisRequest(BaseModel):
import_record_id: str = Field(..., description="Unique identifier for this import run.")
example_data: str = Field(
...,
max_length=30_000,
description="Sample rows from the import payload. Limited to 30k characters.",
)
table_headers: List[str] = Field(
...,
min_length=1,
description="Ordered list of table headers associated with the data.",
)
llm_model: str = Field(
...,
description="Model identifier. Accepts 'provider:model' format or plain model name.",
)
temperature: Optional[float] = Field(
None,
description="Optional override for LLM temperature when generating recognition output.",
)
max_tokens: Optional[int] = Field(
None,
description="Optional override for maximum tokens generated during recognition.",
)
class DataImportAnalysisResponse(BaseModel):
import_record_id: str
llm_response: LLMResponse
class DataImportAnalysisJobRequest(BaseModel):
import_record_id: str = Field(
..., description="Unique identifier for this import request run."
)
rows: List[Union[Dict[str, Any], List[Any]]] = Field(
...,
description="Sample rows from the import payload. Accepts list of dicts or list of lists.",
)
headers: Optional[List[str]] = Field(
None,
description="Ordered list of table headers associated with the data sample.",
)
raw_csv: Optional[str] = Field(
None,
description="Optional raw CSV representation of the sample rows, if already prepared.",
)
table_schema: Optional[Any] = Field(
None,
description="Optional schema description for the table. Can be a string or JSON-serialisable structure.",
)
callback_url: HttpUrl = Field(
...,
description="URL to notify when the analysis completes. Receives JSON payload with status/results.",
)
llm_model: str = Field(
DEFAULT_IMPORT_MODEL,
description="Target LLM model identifier. Defaults to DEFAULT_IMPORT_MODEL.",
)
temperature: Optional[float] = Field(
None,
description="Optional override for model temperature when generating analysis output.",
)
max_output_tokens: Optional[int] = Field(
None,
description="Optional maximum number of tokens to generate in the analysis response.",
)
class DataImportAnalysisJobAck(BaseModel):
import_record_id: str = Field(..., description="Echo of the import record identifier")
status: str = Field("accepted", description="Processing status acknowledgement.")