data-ge/app/models.py

from __future__ import annotations

from enum import Enum
from typing import Any, List, Optional

from pydantic import BaseModel, Field


class LLMRole(str, Enum):
    USER = "user"
    ASSISTANT = "assistant"
    SYSTEM = "system"


class LLMMessage(BaseModel):
    role: LLMRole = Field(..., description="Message author role.")
    content: str = Field(..., description="Plain text content of the message.")


class LLMProvider(str, Enum):
    OPENAI = "openai"
    ANTHROPIC = "anthropic"
    OPENROUTER = "openrouter"
    GEMINI = "gemini"
    QWEN = "qwen"
    DEEPSEEK = "deepseek"


class LLMRequest(BaseModel):
    provider: LLMProvider = Field(..., description="Target LLM provider identifier.")
    model: str = Field(..., description="Model name understood by the provider.")
    messages: List[LLMMessage] = Field(..., description="Ordered chat messages.")
    temperature: Optional[float] = Field(
        0.7, description="Sampling temperature when supported."
    )
    top_p: Optional[float] = Field(
        None, description="Top-p nucleus sampling when supported."
    )
    max_tokens: Optional[int] = Field(
        None, description="Maximum tokens to generate when supported."
    )
    stream: Optional[bool] = Field(
        False, description="Enable provider streaming if both sides support it."
    )
    extra_params: Optional[dict[str, Any]] = Field(
        None, description="Provider-specific parameters to merge into the payload."
    )


class LLMChoice(BaseModel):
    index: int
    message: LLMMessage


class LLMResponse(BaseModel):
    provider: LLMProvider
    model: str
    choices: List[LLMChoice]
    raw: Optional[dict[str, Any]] = Field(
        None, description="Raw provider response for debugging."
    )


class DataImportAnalysisRequest(BaseModel):
    import_record_id: str = Field(..., description="Unique identifier for this import run.")
    example_data: str = Field(
        ...,
        max_length=30_000,
        description="Sample rows from the import payload. Limited to 30k characters.",
    )
    table_headers: List[str] = Field(
        ...,
        min_length=1,
        description="Ordered list of table headers associated with the data.",
    )
    llm_model: str = Field(
        ...,
        description="Model identifier. Accepts 'provider:model' format or plain model name.",
    )
    temperature: Optional[float] = Field(
        None,
        description="Optional override for LLM temperature when generating recognition output.",
    )
    max_tokens: Optional[int] = Field(
        None,
        description="Optional override for maximum tokens generated during recognition.",
    )


class DataImportAnalysisResponse(BaseModel):
    import_record_id: str
    llm_response: LLMResponse