补充知识生成链路节点数据表

指标生成和查询相关功能api
会话及轮次等相关api
2025-12-08 23:17:22 +08:00 · 2025-12-08 23:16:13 +08:00 · 2025-12-08 23:15:04 +08:00 · 2025-12-08 23:11:43 +08:00 · 2025-12-08 23:08:11 +08:00 · 2025-12-08 23:06:23 +08:00
44 changed files with 4930 additions and 112 deletions
--- a/.env
+++ b/.env
@ -17,7 +17,7 @@ DEFAULT_IMPORT_MODEL=deepseek:deepseek-chat
 IMPORT_GATEWAY_BASE_URL=http://localhost:8000

 # HTTP client configuration
-HTTP_CLIENT_TIMEOUT=60
+HTTP_CLIENT_TIMEOUT=120
 HTTP_CLIENT_TRUST_ENV=false
 # HTTP_CLIENT_PROXY=

@ -27,3 +27,5 @@ IMPORT_CHAT_TIMEOUT_SECONDS=120
 # Logging
 LOG_LEVEL=INFO
 # LOG_FORMAT=%(asctime)s %(levelname)s %(name)s:%(lineno)d %(message)s
+NEW_API_BASE_URL=http://localhost:3000
+NEW_API_AUTH_TOKEN="sk-Q79KGFJRs5Vk9HsfFqoiJk948uLMDhAVe037AeCb31URyWGL"
--- a/README.md
+++ b/README.md
@ -2,7 +2,7 @@

 This project exposes a FastAPI-based microservice that provides:

- A unified chat completions gateway supporting multiple LLM providers (OpenAI, Anthropic, OpenRouter, Gemini, Qwen, DeepSeek, etc.)
+- A unified chat completions gateway that now forwards requests to the internal `new-api` service (default `http://localhost:3000`) while preserving the same client-facing schema.
 - An asynchronous data import analysis pipeline that orchestrates LLM calls to produce structured metadata and processing recommendations

 The following instructions cover environment setup, dependency installation, and running the backend service.
@ -56,6 +56,7 @@ Copy `.env.example` to `.env` (if provided) or edit `.env` to supply API keys an
 - `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `OPENROUTER_API_KEY`, etc.
 - `HTTP_CLIENT_TIMEOUT`, `IMPORT_CHAT_TIMEOUT_SECONDS`
 - `LOG_LEVEL`, `LOG_FORMAT` for logging
+- `NEW_API_BASE_URL` (defaults to `http://localhost:3000`) and optional `NEW_API_AUTH_TOKEN` if the new-api component enforces authentication.


 ## Run the Backend Service
@ -84,4 +85,4 @@ Or use a process manager such as `pm2`, `supervisor`, or systemd for production

 - Run the data import analysis example: `python test/data_import_analysis_example.py`
 - Test the OpenRouter demo: `python test/openrouter_chat_example.py`
- Send a DeepSeek chat request script: `python scripts/deepseek_request.py`
+- Send a DeepSeek chat request script: `python scripts/deepseek_request.py`
--- a/app/models.py
+++ b/app/models.py
@ -254,6 +254,18 @@ class TableSnippetUpsertRequest(BaseModel):
    callback_url: HttpUrl = Field(..., description="Callback URL associated with the action run.")
    table_schema_version_id: int = Field(..., ge=0, description="Identifier for the schema snapshot.")
    table_schema: Any = Field(..., description="Schema snapshot payload for the table.")
+    model: Optional[str] = Field(
+        None,
+        description="LLM model identifier (can be provider alias) used for this action, when applicable.",
+    )
+    model_provider: Optional[str] = Field(
+        None,
+        description="LLM provider responsible for executing the action's model.",
+    )
+    model_params: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Optional model parameter overrides (e.g., temperature) associated with the action.",
+    )
    llm_usage: Optional[Any] = Field(
        None,
        description="Optional token usage metrics reported by the LLM provider.",
--- a/app/routers/init.py
+++ b/app/routers/init.py
@ -0,0 +1,4 @@
+from .chat import router as chat_router
+from .metrics import router as metrics_router
+
+__all__ = ["chat_router", "metrics_router"]
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@ -0,0 +1,102 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, List, Optional
+
+from fastapi import APIRouter, HTTPException, Query
+
+from app.schemas.chat import (
+    ChatSessionCreate,
+    ChatSessionUpdate,
+    ChatTurnCreate,
+    ChatTurnRetrievalBatch,
+)
+from app.services import metric_store
+
+
+router = APIRouter(prefix="/api/v1/chat", tags=["chat"])
+
+
+@router.post("/sessions")
+def create_session(payload: ChatSessionCreate) -> Any:
+    """Create a chat session."""
+    return metric_store.create_chat_session(payload)
+
+
+@router.post("/sessions/{session_id}/update")
+def update_session(session_id: int, payload: ChatSessionUpdate) -> Any:
+    try:
+        return metric_store.update_chat_session(session_id, payload)
+    except KeyError:
+        raise HTTPException(status_code=404, detail="Session not found")
+
+
+@router.post("/sessions/{session_id}/close")
+def close_session(session_id: int) -> Any:
+    """Close a chat session and stamp end_time."""
+    try:
+        return metric_store.close_chat_session(session_id)
+    except KeyError:
+        raise HTTPException(status_code=404, detail="Session not found")
+
+
+@router.get("/sessions/{session_id}")
+def get_session(session_id: int) -> Any:
+    """Fetch one session."""
+    session = metric_store.get_chat_session(session_id)
+    if not session:
+        raise HTTPException(status_code=404, detail="Session not found")
+    return session
+
+
+@router.get("/sessions")
+def list_sessions(
+    user_id: Optional[int] = None,
+    status: Optional[str] = None,
+    start_from: Optional[datetime] = Query(None, description="Filter by start time lower bound."),
+    start_to: Optional[datetime] = Query(None, description="Filter by start time upper bound."),
+    limit: int = Query(50, ge=1, le=500),
+    offset: int = Query(0, ge=0),
+) -> List[Any]:
+    return metric_store.list_chat_sessions(
+        user_id=user_id,
+        status=status,
+        start_from=start_from,
+        start_to=start_to,
+        limit=limit,
+        offset=offset,
+    )
+
+
+@router.post("/sessions/{session_id}/turns")
+def create_turn(session_id: int, payload: ChatTurnCreate) -> Any:
+    """Create a turn under a session."""
+    try:
+        return metric_store.create_chat_turn(session_id, payload)
+    except Exception as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@router.get("/sessions/{session_id}/turns")
+def list_turns(session_id: int) -> List[Any]:
+    return metric_store.list_chat_turns(session_id)
+
+
+@router.get("/turns/{turn_id}")
+def get_turn(turn_id: int) -> Any:
+    turn = metric_store.get_chat_turn(turn_id)
+    if not turn:
+        raise HTTPException(status_code=404, detail="Turn not found")
+    return turn
+
+
+@router.post("/turns/{turn_id}/retrievals")
+def write_retrievals(turn_id: int, payload: ChatTurnRetrievalBatch) -> Any:
+    """Batch write retrieval records for a turn."""
+    count = metric_store.create_retrievals(turn_id, payload.retrievals)
+    return {"turn_id": turn_id, "inserted": count}
+
+
+@router.get("/turns/{turn_id}/retrievals")
+def list_retrievals(turn_id: int) -> List[Any]:
+    return metric_store.list_retrievals(turn_id)
--- a/app/routers/metrics.py
+++ b/app/routers/metrics.py
@ -0,0 +1,166 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, List, Optional
+
+from fastapi import APIRouter, HTTPException, Query
+
+from app.schemas.metrics import (
+    MetricCreate,
+    MetricResultsWriteRequest,
+    MetricRunTrigger,
+    MetricScheduleCreate,
+    MetricScheduleUpdate,
+    MetricUpdate,
+)
+from app.services import metric_store
+
+
+router = APIRouter(prefix="/api/v1", tags=["metrics"])
+
+
+@router.post("/metrics")
+def create_metric(payload: MetricCreate) -> Any:
+    """Create a metric definition."""
+    try:
+        return metric_store.create_metric(payload)
+    except Exception as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@router.post("/metrics/{metric_id}")
+def update_metric(metric_id: int, payload: MetricUpdate) -> Any:
+    """Update fields of a metric definition."""
+    try:
+        return metric_store.update_metric(metric_id, payload)
+    except KeyError:
+        raise HTTPException(status_code=404, detail="Metric not found")
+    except Exception as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@router.get("/metrics/{metric_id}")
+def get_metric(metric_id: int) -> Any:
+    """Fetch a metric definition by id."""
+    metric = metric_store.get_metric(metric_id)
+    if not metric:
+        raise HTTPException(status_code=404, detail="Metric not found")
+    return metric
+
+
+@router.get("/metrics")
+def list_metrics(
+    biz_domain: Optional[str] = None,
+    is_active: Optional[bool] = None,
+    keyword: Optional[str] = Query(None, description="Search by code/name"),
+    limit: int = Query(100, ge=1, le=500),
+    offset: int = Query(0, ge=0),
+) -> List[Any]:
+    """List metrics with optional filters."""
+    return metric_store.list_metrics(
+        biz_domain=biz_domain,
+        is_active=is_active,
+        keyword=keyword,
+        limit=limit,
+        offset=offset,
+    )
+
+
+@router.post("/metric-schedules")
+def create_schedule(payload: MetricScheduleCreate) -> Any:
+    """Create a metric schedule."""
+    try:
+        return metric_store.create_metric_schedule(payload)
+    except Exception as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@router.post("/metric-schedules/{schedule_id}")
+def update_schedule(schedule_id: int, payload: MetricScheduleUpdate) -> Any:
+    """Update a metric schedule."""
+    try:
+        return metric_store.update_metric_schedule(schedule_id, payload)
+    except KeyError:
+        raise HTTPException(status_code=404, detail="Schedule not found")
+    except Exception as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@router.get("/metrics/{metric_id}/schedules")
+def list_schedules(metric_id: int) -> List[Any]:
+    """List schedules for one metric."""
+    return metric_store.list_schedules_for_metric(metric_id=metric_id)
+
+
+@router.post("/metric-runs/trigger")
+def trigger_run(payload: MetricRunTrigger) -> Any:
+    """Insert a run record (execution handled externally)."""
+    try:
+        return metric_store.trigger_metric_run(payload)
+    except KeyError as exc:
+        raise HTTPException(status_code=404, detail=str(exc)) from exc
+    except Exception as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+
+
+@router.get("/metric-runs")
+def list_runs(
+    metric_id: Optional[int] = None,
+    status: Optional[str] = None,
+    limit: int = Query(100, ge=1, le=500),
+    offset: int = Query(0, ge=0),
+) -> List[Any]:
+    """List run records."""
+    return metric_store.list_metric_runs(
+        metric_id=metric_id, status=status, limit=limit, offset=offset
+    )
+
+
+@router.get("/metric-runs/{run_id}")
+def get_run(run_id: int) -> Any:
+    """Fetch run details."""
+    run = metric_store.get_metric_run(run_id)
+    if not run:
+        raise HTTPException(status_code=404, detail="Run not found")
+    return run
+
+
+@router.post("/metric-results/{metric_id}")
+def write_results(metric_id: int, payload: MetricResultsWriteRequest) -> Any:
+    # Align path metric_id with payload to avoid mismatch.
+    if payload.metric_id != metric_id:
+        raise HTTPException(status_code=400, detail="metric_id in path/body mismatch")
+    try:
+        inserted = metric_store.write_metric_results(payload)
+    except KeyError as exc:
+        raise HTTPException(status_code=404, detail=str(exc)) from exc
+    except Exception as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    return {"metric_id": metric_id, "inserted": inserted}
+
+
+@router.get("/metric-results")
+def query_results(
+    metric_id: int,
+    stat_from: Optional[datetime] = None,
+    stat_to: Optional[datetime] = None,
+    limit: int = Query(200, ge=1, le=1000),
+    offset: int = Query(0, ge=0),
+) -> List[Any]:
+    """Query metric results by time range."""
+    return metric_store.query_metric_results(
+        metric_id=metric_id,
+        stat_from=stat_from,
+        stat_to=stat_to,
+        limit=limit,
+        offset=offset,
+    )
+
+
+@router.get("/metric-results/latest")
+def latest_result(metric_id: int) -> Any:
+    """Fetch the latest metric result."""
+    result = metric_store.latest_metric_result(metric_id)
+    if not result:
+        raise HTTPException(status_code=404, detail="Metric result not found")
+    return result
--- a/app/schemas/chat.py
+++ b/app/schemas/chat.py
@ -0,0 +1,53 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class ChatSessionCreate(BaseModel):
+    """Create a chat session to group multiple turns for a user."""
+    user_id: int = Field(..., description="User ID owning the session.")
+    session_uuid: Optional[str] = Field(None, description="Optional externally provided UUID.")
+    status: Optional[str] = Field("OPEN", description="Session status, default OPEN.")
+    end_time: Optional[datetime] = Field(None, description="Optional end time.")
+    ext_context: Optional[dict[str, Any]] = Field(None, description="Arbitrary business context.")
+
+
+class ChatSessionUpdate(BaseModel):
+    """Partial update for a chat session."""
+    status: Optional[str] = Field(None, description="New session status.")
+    end_time: Optional[datetime] = Field(None, description="Close time override.")
+    last_turn_id: Optional[int] = Field(None, description="Pointer to last chat turn.")
+    ext_context: Optional[dict[str, Any]] = Field(None, description="Context patch.")
+
+
+class ChatTurnCreate(BaseModel):
+    """Create a single chat turn with intent/SQL context."""
+    user_id: int = Field(..., description="User ID for this turn.")
+    user_query: str = Field(..., description="Raw user query content.")
+    intent: Optional[str] = Field(None, description="Intent tag such as METRIC_QUERY.")
+    ast_json: Optional[dict[str, Any]] = Field(None, description="Parsed AST payload.")
+    generated_sql: Optional[str] = Field(None, description="Final SQL text, if generated.")
+    sql_status: Optional[str] = Field(None, description="SQL generation/execution status.")
+    error_msg: Optional[str] = Field(None, description="Error message when SQL failed.")
+    main_metric_ids: Optional[List[int]] = Field(None, description="Metric IDs referenced in this turn.")
+    created_metric_ids: Optional[List[int]] = Field(None, description="Metric IDs created in this turn.")
+    end_time: Optional[datetime] = Field(None, description="Turn end time.")
+
+
+class ChatTurnRetrievalItem(BaseModel):
+    """Record of one retrieved item contributing to a turn."""
+    item_type: str = Field(..., description="METRIC/SNIPPET/CHAT etc.")
+    item_id: str = Field(..., description="Identifier such as metric_id or snippet_id.")
+    item_extra: Optional[dict[str, Any]] = Field(None, description="Additional context like column name.")
+    similarity_score: Optional[float] = Field(None, description="Similarity score.")
+    rank_no: Optional[int] = Field(None, description="Ranking position.")
+    used_in_reasoning: Optional[bool] = Field(False, description="Flag if used in reasoning.")
+    used_in_sql: Optional[bool] = Field(False, description="Flag if used in final SQL.")
+
+
+class ChatTurnRetrievalBatch(BaseModel):
+    """Batch insert wrapper for retrieval records."""
+    retrievals: List[ChatTurnRetrievalItem]
--- a/app/schemas/metrics.py
+++ b/app/schemas/metrics.py
@ -0,0 +1,99 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class MetricCreate(BaseModel):
+    """Create a metric definition with business and technical metadata."""
+    metric_code: str = Field(..., description="Internal metric code, unique.")
+    metric_name: str = Field(..., description="Display name.")
+    metric_aliases: Optional[List[str]] = Field(None, description="Optional alias list.")
+    biz_domain: str = Field(..., description="Business domain identifier.")
+    biz_desc: Optional[str] = Field(None, description="Business definition.")
+    chat_turn_id: Optional[int] = Field(None, description="Source chat turn ID.")
+    tech_desc: Optional[str] = Field(None, description="Technical definition.")
+    formula_expr: Optional[str] = Field(None, description="Formula expression text.")
+    base_sql: str = Field(..., description="Canonical SQL used to compute the metric.")
+    time_grain: str = Field(..., description="DAY/HOUR/WEEK/MONTH etc.")
+    dim_binding: List[str] = Field(..., description="Dimension columns bound to the metric.")
+    update_strategy: str = Field(..., description="FULL/INCR/REALTIME.")
+    schedule_id: Optional[int] = Field(None, description="Linked schedule id if any.")
+    schedule_type: Optional[int] = Field(None, description="Scheduler type identifier.")
+    is_active: bool = Field(True, description="Whether the metric is enabled.")
+    created_by: Optional[int] = Field(None, description="Creator user id.")
+    updated_by: Optional[int] = Field(None, description="Updater user id.")
+
+
+class MetricUpdate(BaseModel):
+    """Partial update for an existing metric definition."""
+    metric_name: Optional[str] = None
+    metric_aliases: Optional[List[str]] = None
+    biz_domain: Optional[str] = None
+    biz_desc: Optional[str] = None
+    tech_desc: Optional[str] = None
+    formula_expr: Optional[str] = None
+    base_sql: Optional[str] = None
+    time_grain: Optional[str] = None
+    dim_binding: Optional[List[str]] = None
+    update_strategy: Optional[str] = None
+    schedule_id: Optional[int] = None
+    schedule_type: Optional[int] = None
+    is_active: Optional[bool] = None
+    updated_by: Optional[int] = None
+
+
+class MetricScheduleCreate(BaseModel):
+    """Create a cron-based schedule for a metric."""
+    metric_id: int
+    cron_expr: str
+    enabled: bool = True
+    priority: int = 10
+    backfill_allowed: bool = True
+    max_runtime_sec: Optional[int] = None
+    retry_times: int = 0
+    owner_team: Optional[str] = None
+    owner_user_id: Optional[int] = None
+
+
+class MetricScheduleUpdate(BaseModel):
+    """Update fields of an existing metric schedule."""
+    cron_expr: Optional[str] = None
+    enabled: Optional[bool] = None
+    priority: Optional[int] = None
+    backfill_allowed: Optional[bool] = None
+    max_runtime_sec: Optional[int] = None
+    retry_times: Optional[int] = None
+    owner_team: Optional[str] = None
+    owner_user_id: Optional[int] = None
+
+
+class MetricRunTrigger(BaseModel):
+    """Trigger a metric run, optionally linking to a chat turn or schedule."""
+    metric_id: int
+    schedule_id: Optional[int] = None
+    source_turn_id: Optional[int] = None
+    data_time_from: Optional[datetime] = None
+    data_time_to: Optional[datetime] = None
+    metric_version: Optional[int] = None
+    base_sql_snapshot: Optional[str] = None
+    triggered_by: str = Field("API", description="SCHEDULER/MANUAL/API/QA_TURN")
+    triggered_at: Optional[datetime] = None
+
+
+class MetricResultItem(BaseModel):
+    """Single metric result row to be persisted."""
+    stat_time: datetime
+    metric_value: float
+    metric_version: Optional[int] = None
+    extra_dims: Optional[dict[str, Any]] = None
+    load_time: Optional[datetime] = None
+    data_version: Optional[int] = None
+
+
+class MetricResultsWriteRequest(BaseModel):
+    """Batch write request for metric results."""
+    metric_id: int
+    results: List[MetricResultItem]
--- a/app/services/init.py
+++ b/app/services/init.py
@ -1,3 +1,4 @@
 from .gateway import LLMGateway
+from .rag_client import RagAPIClient

-__all__ = ["LLMGateway"]
+__all__ = ["LLMGateway", "RagAPIClient"]
--- a/app/services/gateway.py
+++ b/app/services/gateway.py
@ -1,53 +1,93 @@
 from __future__ import annotations

-import os
-from typing import Dict, Type
+import logging

 import httpx
+from pydantic import ValidationError

-from app.exceptions import ProviderConfigurationError
-from app.models import LLMProvider, LLMRequest, LLMResponse
-from app.providers import (
-    AnthropicProvider,
-    DeepSeekProvider,
-    GeminiProvider,
-    LLMProviderClient,
-    OpenAIProvider,
-    OpenRouterProvider,
-    QwenProvider,
-)
+from app.exceptions import ProviderAPICallError
+from app.models import LLMChoice, LLMMessage, LLMRequest, LLMResponse
+from app.settings import NEW_API_AUTH_TOKEN, NEW_API_BASE_URL
+
+
+logger = logging.getLogger(__name__)


 class LLMGateway:
-    """Simple registry that dispatches chat requests to provider clients."""
+    """Forward chat requests to the configured new-api component."""

-    def __init__(self) -> None:
-        self._providers: Dict[LLMProvider, LLMProviderClient] = {}
-        self._factory: Dict[LLMProvider, Type[LLMProviderClient]] = {
-            LLMProvider.OPENAI: OpenAIProvider,
-            LLMProvider.ANTHROPIC: AnthropicProvider,
-            LLMProvider.OPENROUTER: OpenRouterProvider,
-            LLMProvider.GEMINI: GeminiProvider,
-            LLMProvider.QWEN: QwenProvider,
-            LLMProvider.DEEPSEEK: DeepSeekProvider,
-        }
-
-    def get_provider(self, provider: LLMProvider) -> LLMProviderClient:
-        if provider not in self._factory:
-            raise ProviderConfigurationError(f"Unsupported provider '{provider.value}'.")
-
-        if provider not in self._providers:
-            self._providers[provider] = self._build_provider(provider)
-        return self._providers[provider]
-
-    def _build_provider(self, provider: LLMProvider) -> LLMProviderClient:
-        provider_cls = self._factory[provider]
-        api_key_env = getattr(provider_cls, "api_key_env", None)
-        api_key = os.getenv(api_key_env) if api_key_env else None
-        return provider_cls(api_key)
+    def __init__(
+        self,
+        *,
+        base_url: str | None = None,
+        auth_token: str | None = None,
+    ) -> None:
+        resolved_base = base_url or NEW_API_BASE_URL
+        self._base_url = resolved_base.rstrip("/")
+        self._auth_token = auth_token or NEW_API_AUTH_TOKEN

    async def chat(
        self, request: LLMRequest, client: httpx.AsyncClient
    ) -> LLMResponse:
-        provider_client = self.get_provider(request.provider)
-        return await provider_client.chat(request, client)
+        url = f"{self._base_url}/v1/chat/completions"
+        payload = request.model_dump(mode="json", exclude_none=True)
+        headers = {"Content-Type": "application/json"}
+        if self._auth_token:
+            headers["Authorization"] = f"Bearer {self._auth_token}"
+        logger.info("Forwarding chat request to new-api at %s", url)
+        try:
+            response = await client.post(url, json=payload, headers=headers)
+            response.raise_for_status()
+        except httpx.HTTPStatusError as exc:
+            status_code = exc.response.status_code if exc.response else None
+            response_text = exc.response.text if exc.response else ""
+            logger.error(
+                "new-api upstream returned %s: %s",
+                status_code,
+                response_text,
+                exc_info=True,
+            )
+            raise ProviderAPICallError(
+                "Chat completion request failed.",
+                status_code=status_code,
+                response_text=response_text,
+            ) from exc
+        except httpx.HTTPError as exc:
+            logger.error("new-api transport error: %s", exc, exc_info=True)
+            raise ProviderAPICallError(f"Chat completion request failed: {exc}") from exc
+
+        try:
+            data = response.json()
+        except ValueError as exc:
+            logger.error("new-api responded with invalid JSON.", exc_info=True)
+            raise ProviderAPICallError(
+                "Chat completion response was not valid JSON."
+            ) from exc
+
+        logger.info("new-api payload: %s", data)
+        normalized_choices: list[LLMChoice] = []
+        for idx, choice in enumerate(data.get("choices", []) or []):
+            message_payload = choice.get("message") or {}
+            message = LLMMessage(
+                role=message_payload.get("role", "assistant"),
+                content=message_payload.get("content", ""),
+            )
+            normalized_choices.append(
+                LLMChoice(index=choice.get("index", idx), message=message)
+            )
+
+        try:
+            normalized_response = LLMResponse(
+                provider=request.provider,
+                model=data.get("model", request.model),
+                choices=normalized_choices,
+                raw=data,
+            )
+            return normalized_response
+        except ValidationError as exc:
+            logger.error(
+                "new-api response did not match expected schema: %s", data, exc_info=True
+            )
+            raise ProviderAPICallError(
+                "Chat completion response was not in the expected format."
+            ) from exc
--- a/app/services/import_analysis.py
+++ b/app/services/import_analysis.py
@ -22,14 +22,24 @@ from app.models import (
    LLMResponse,
    LLMRole,
 )
-from app.settings import DEFAULT_IMPORT_MODEL, get_supported_import_models
+from app.settings import (
+    DEFAULT_IMPORT_MODEL,
+    NEW_API_AUTH_TOKEN,
+    NEW_API_BASE_URL,
+    get_supported_import_models,
+)
 from app.utils.llm_usage import extract_usage

 logger = logging.getLogger(__name__)

-IMPORT_GATEWAY_BASE_URL = os.getenv(
-    "IMPORT_GATEWAY_BASE_URL", "http://localhost:8000"
-)
+IMPORT_GATEWAY_BASE_URL = os.getenv("IMPORT_GATEWAY_BASE_URL", NEW_API_BASE_URL)
+
+
+def build_import_gateway_headers() -> dict[str, str]:
+    headers = {"Content-Type": "application/json"}
+    if NEW_API_AUTH_TOKEN:
+        headers["Authorization"] = f"Bearer {NEW_API_AUTH_TOKEN}"
+    return headers


 def _env_float(name: str, default: float) -> float:
@ -314,16 +324,18 @@ async def dispatch_import_analysis_job(
    url = f"{IMPORT_GATEWAY_BASE_URL.rstrip('/')}/v1/chat/completions"

    logger.info(
-        "Dispatching import %s to %s: %s",
+        "Dispatching import %s to %s using provider=%s model=%s",
        request.import_record_id,
        url,
-        json.dumps(payload, ensure_ascii=False),
+        payload.get("provider"),
+        payload.get("model"),
    )

    timeout = httpx.Timeout(IMPORT_CHAT_TIMEOUT_SECONDS)
+    headers = build_import_gateway_headers()

    try:
-        response = await client.post(url, json=payload, timeout=timeout)
+        response = await client.post(url, json=payload, timeout=timeout, headers=headers)
        response.raise_for_status()
    except httpx.HTTPStatusError as exc:
        body_preview = ""
@ -348,9 +360,10 @@ async def dispatch_import_analysis_job(
        response.status_code,
    )
    logger.info(
-        "LLM response for %s: %s",
+        "LLM response received for %s (status %s, choices=%s)",
        request.import_record_id,
-        json.dumps(response_data, ensure_ascii=False),
+        response.status_code,
+        len(response_data.get("choices") or []),
    )

    try:
@ -404,6 +417,7 @@ async def process_import_analysis_job(
    request: DataImportAnalysisJobRequest,
    client: httpx.AsyncClient,
 ) -> None:
+    # Run the import analysis and ensure the callback fires regardless of success/failure.
    try:
        payload = await dispatch_import_analysis_job(request, client)
    except ProviderAPICallError as exc:
--- a/app/services/metric_store.py
+++ b/app/services/metric_store.py
@ -0,0 +1,842 @@
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+from datetime import datetime
+from typing import Any, Dict, Iterable, List, Optional
+from uuid import uuid4
+
+from sqlalchemy import text
+from sqlalchemy.engine import Row
+
+from app.db import get_engine
+from app.schemas.chat import (
+    ChatSessionCreate,
+    ChatSessionUpdate,
+    ChatTurnCreate,
+    ChatTurnRetrievalItem,
+)
+from app.schemas.metrics import (
+    MetricCreate,
+    MetricResultItem,
+    MetricResultsWriteRequest,
+    MetricRunTrigger,
+    MetricScheduleCreate,
+    MetricScheduleUpdate,
+    MetricUpdate,
+)
+
+
+logger = logging.getLogger(__name__)
+
+
+# Common helpers
+def _json_dump(value: Any) -> Optional[str]:
+    """Safe JSON dumper; returns None on failure to keep DB writes simple."""
+    if value is None:
+        return None
+    if isinstance(value, str):
+        return value
+    try:
+        return json.dumps(value, ensure_ascii=False)
+    except (TypeError, ValueError):
+        return None
+
+
+def _parse_json_fields(payload: Dict[str, Any], fields: Iterable[str]) -> Dict[str, Any]:
+    """Parse select fields from JSON strings into dict/list for responses."""
+    for field in fields:
+        raw = payload.get(field)
+        if raw is None or isinstance(raw, (dict, list)):
+            continue
+        if isinstance(raw, (bytes, bytearray)):
+            raw = raw.decode("utf-8", errors="ignore")
+        if isinstance(raw, str):
+            try:
+                payload[field] = json.loads(raw)
+            except ValueError:
+                pass
+    return payload
+
+
+def _row_to_dict(row: Row[Any]) -> Dict[str, Any]:
+    return dict(row._mapping)
+
+
+# Chat sessions & turns
+def create_chat_session(payload: ChatSessionCreate) -> Dict[str, Any]:
+    """Create a chat session row with optional external UUID."""
+    engine = get_engine()
+    session_uuid = payload.session_uuid or str(uuid4())
+    now = datetime.utcnow()
+    params = {
+        "user_id": payload.user_id,
+        "session_uuid": session_uuid,
+        "end_time": payload.end_time,
+        "status": payload.status or "OPEN",
+        "ext_context": _json_dump(payload.ext_context),
+    }
+    with engine.begin() as conn:
+        result = conn.execute(
+            text(
+                """
+                INSERT INTO chat_session (user_id, session_uuid, end_time, status, ext_context)
+                VALUES (:user_id, :session_uuid, :end_time, :status, :ext_context)
+                """
+            ),
+            params,
+        )
+        session_id = result.lastrowid
+        row = conn.execute(
+            text("SELECT * FROM chat_session WHERE id=:id"), {"id": session_id}
+        ).first()
+    if not row:
+        raise RuntimeError("Failed to create chat session.")
+    data = _row_to_dict(row)
+    _parse_json_fields(data, ["ext_context"])
+    return data
+
+
+def update_chat_session(session_id: int, payload: ChatSessionUpdate) -> Dict[str, Any]:
+    """Patch selected chat session fields."""
+    updates = {}
+    if payload.status is not None:
+        updates["status"] = payload.status
+    if payload.end_time is not None:
+        updates["end_time"] = payload.end_time
+    if payload.last_turn_id is not None:
+        updates["last_turn_id"] = payload.last_turn_id
+    if payload.ext_context is not None:
+        updates["ext_context"] = _json_dump(payload.ext_context)
+
+    if not updates:
+        current = get_chat_session(session_id)
+        if not current:
+            raise KeyError(f"Session {session_id} not found.")
+        return current
+
+    set_clause = ", ".join(f"{key}=:{key}" for key in updates.keys())
+    params = dict(updates)
+    params["id"] = session_id
+
+    engine = get_engine()
+    with engine.begin() as conn:
+        conn.execute(
+            text(f"UPDATE chat_session SET {set_clause} WHERE id=:id"),
+            params,
+        )
+        row = conn.execute(
+            text("SELECT * FROM chat_session WHERE id=:id"), {"id": session_id}
+        ).first()
+    if not row:
+        raise KeyError(f"Session {session_id} not found.")
+    data = _row_to_dict(row)
+    _parse_json_fields(data, ["ext_context"])
+    return data
+
+
+def close_chat_session(session_id: int) -> Dict[str, Any]:
+    """Mark a chat session as CLOSED with end_time."""
+    now = datetime.utcnow()
+    return update_chat_session(
+        session_id,
+        ChatSessionUpdate(status="CLOSED", end_time=now),
+    )
+
+
+def get_chat_session(session_id: int) -> Optional[Dict[str, Any]]:
+    engine = get_engine()
+    with engine.begin() as conn:
+        row = conn.execute(
+            text("SELECT * FROM chat_session WHERE id=:id"), {"id": session_id}
+        ).first()
+    if not row:
+        return None
+    data = _row_to_dict(row)
+    _parse_json_fields(data, ["ext_context"])
+    return data
+
+
+def list_chat_sessions(
+    *,
+    user_id: Optional[int] = None,
+    status: Optional[str] = None,
+    start_from: Optional[datetime] = None,
+    start_to: Optional[datetime] = None,
+    limit: int = 50,
+    offset: int = 0,
+) -> List[Dict[str, Any]]:
+    """List chat sessions with optional filters and pagination."""
+    conditions = []
+    params: Dict[str, Any] = {"limit": limit, "offset": offset}
+    if user_id is not None:
+        conditions.append("user_id=:user_id")
+        params["user_id"] = user_id
+    if status is not None:
+        conditions.append("status=:status")
+        params["status"] = status
+    if start_from is not None:
+        conditions.append("created_at>=:start_from")
+        params["start_from"] = start_from
+    if start_to is not None:
+        conditions.append("created_at<=:start_to")
+        params["start_to"] = start_to
+
+    where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
+    engine = get_engine()
+    with engine.begin() as conn:
+        rows = conn.execute(
+            text(
+                f"SELECT * FROM chat_session {where_clause} "
+                "ORDER BY created_at DESC LIMIT :limit OFFSET :offset"
+            ),
+            params,
+        ).fetchall()
+    results: List[Dict[str, Any]] = []
+    for row in rows:
+        data = _row_to_dict(row)
+        _parse_json_fields(data, ["ext_context"])
+        results.append(data)
+    return results
+
+
+def _next_turn_no(conn, session_id: int) -> int:
+    row = conn.execute(
+        text("SELECT COALESCE(MAX(turn_no), 0) + 1 AS next_no FROM chat_turn WHERE session_id=:sid"),
+        {"sid": session_id},
+    ).first()
+    if not row:
+        return 1
+    return int(row._mapping["next_no"])
+
+
+def create_chat_turn(session_id: int, payload: ChatTurnCreate) -> Dict[str, Any]:
+    """Insert a chat turn and auto-increment turn number within the session."""
+    engine = get_engine()
+    now = datetime.utcnow()
+    params = {
+        "session_id": session_id,
+        "user_id": payload.user_id,
+        "user_query": payload.user_query,
+        "intent": payload.intent,
+        "ast_json": _json_dump(payload.ast_json),
+        "generated_sql": payload.generated_sql,
+        "sql_status": payload.sql_status,
+        "error_msg": payload.error_msg,
+        "main_metric_ids": _json_dump(payload.main_metric_ids),
+        "created_metric_ids": _json_dump(payload.created_metric_ids),
+        "end_time": payload.end_time,
+    }
+    with engine.begin() as conn:
+        turn_no = _next_turn_no(conn, session_id)
+        params["turn_no"] = turn_no
+        result = conn.execute(
+            text(
+                """
+                INSERT INTO chat_turn (
+                    session_id, turn_no, user_id,
+                    user_query, intent, ast_json,
+                    generated_sql, sql_status, error_msg,
+                    main_metric_ids, created_metric_ids,
+                    end_time
+                )
+                VALUES (
+                    :session_id, :turn_no, :user_id,
+                    :user_query, :intent, :ast_json,
+                    :generated_sql, :sql_status, :error_msg,
+                    :main_metric_ids, :created_metric_ids,
+                     :end_time
+                )
+                """
+            ),
+            params,
+        )
+        turn_id = result.lastrowid
+        row = conn.execute(
+            text("SELECT * FROM chat_turn WHERE id=:id"), {"id": turn_id}
+        ).first()
+    if not row:
+        raise RuntimeError("Failed to create chat turn.")
+    data = _row_to_dict(row)
+    _parse_json_fields(data, ["ast_json", "main_metric_ids", "created_metric_ids"])
+    return data
+
+
+def get_chat_turn(turn_id: int) -> Optional[Dict[str, Any]]:
+    engine = get_engine()
+    with engine.begin() as conn:
+        row = conn.execute(
+            text("SELECT * FROM chat_turn WHERE id=:id"), {"id": turn_id}
+        ).first()
+    if not row:
+        return None
+    data = _row_to_dict(row)
+    _parse_json_fields(data, ["ast_json", "main_metric_ids", "created_metric_ids"])
+    return data
+
+
+def list_chat_turns(session_id: int) -> List[Dict[str, Any]]:
+    engine = get_engine()
+    with engine.begin() as conn:
+        rows = conn.execute(
+            text(
+                "SELECT * FROM chat_turn WHERE session_id=:session_id ORDER BY turn_no ASC"
+            ),
+            {"session_id": session_id},
+        ).fetchall()
+    results: List[Dict[str, Any]] = []
+    for row in rows:
+        data = _row_to_dict(row)
+        _parse_json_fields(data, ["ast_json", "main_metric_ids", "created_metric_ids"])
+        results.append(data)
+    return results
+
+
+def create_retrievals(turn_id: int, retrievals: List[ChatTurnRetrievalItem]) -> int:
+    """Batch insert retrieval records for a turn."""
+    if not retrievals:
+        return 0
+    engine = get_engine()
+    params_list = []
+    for item in retrievals:
+        params_list.append(
+            {
+                "turn_id": turn_id,
+                "item_type": item.item_type,
+                "item_id": item.item_id,
+                "item_extra": _json_dump(item.item_extra),
+                "similarity_score": item.similarity_score,
+                "rank_no": item.rank_no,
+                "used_in_reasoning": 1 if item.used_in_reasoning else 0,
+                "used_in_sql": 1 if item.used_in_sql else 0,
+            }
+        )
+    with engine.begin() as conn:
+        conn.execute(
+            text(
+                """
+                INSERT INTO chat_turn_retrieval (
+                    turn_id, item_type, item_id, item_extra,
+                    similarity_score, rank_no, used_in_reasoning, used_in_sql
+                )
+                VALUES (
+                    :turn_id, :item_type, :item_id, :item_extra,
+                    :similarity_score, :rank_no, :used_in_reasoning, :used_in_sql
+                )
+                """
+            ),
+            params_list,
+        )
+    return len(retrievals)
+
+
+def list_retrievals(turn_id: int) -> List[Dict[str, Any]]:
+    engine = get_engine()
+    with engine.begin() as conn:
+        rows = conn.execute(
+            text(
+                "SELECT * FROM chat_turn_retrieval WHERE turn_id=:turn_id ORDER BY created_at ASC, rank_no ASC"
+            ),
+            {"turn_id": turn_id},
+        ).fetchall()
+    results: List[Dict[str, Any]] = []
+    for row in rows:
+        data = _row_to_dict(row)
+        _parse_json_fields(data, ["item_extra"])
+        data["used_in_reasoning"] = bool(data.get("used_in_reasoning"))
+        data["used_in_sql"] = bool(data.get("used_in_sql"))
+        results.append(data)
+    return results
+
+
+# Metric registry
+def _metric_sql_hash(sql_text: str) -> str:
+    """Compute a stable hash to detect SQL definition changes."""
+    return hashlib.md5(sql_text.encode("utf-8")).hexdigest()
+
+
+def create_metric(payload: MetricCreate) -> Dict[str, Any]:
+    """Insert a new metric definition; version starts at 1."""
+    engine = get_engine()
+    now = datetime.utcnow()
+    sql_hash = _metric_sql_hash(payload.base_sql)
+    params = {
+        "metric_code": payload.metric_code,
+        "metric_name": payload.metric_name,
+        "metric_aliases": _json_dump(payload.metric_aliases),
+        "biz_domain": payload.biz_domain,
+        "biz_desc": payload.biz_desc,
+        "chat_turn_id": payload.chat_turn_id,
+        "tech_desc": payload.tech_desc,
+        "formula_expr": payload.formula_expr,
+        "base_sql": payload.base_sql,
+        "time_grain": payload.time_grain,
+        "dim_binding": _json_dump(payload.dim_binding),
+        "update_strategy": payload.update_strategy,
+        "schedule_id": payload.schedule_id,
+        "schedule_type": payload.schedule_type,
+        "version": 1,
+        "is_active": 1 if payload.is_active else 0,
+        "sql_hash": sql_hash,
+        "created_by": payload.created_by,
+        "updated_by": payload.updated_by,
+        "created_at": now,
+        "updated_at": now,
+    }
+    with engine.begin() as conn:
+        result = conn.execute(
+            text(
+                """
+                INSERT INTO metric_def (
+                    metric_code, metric_name, metric_aliases, biz_domain, biz_desc,
+                    chat_turn_id, tech_desc, formula_expr, base_sql,
+                    time_grain, dim_binding, update_strategy,
+                    schedule_id, schedule_type, version, is_active,
+                    sql_hash, created_by, updated_by, created_at, updated_at
+                )
+                VALUES (
+                    :metric_code, :metric_name, :metric_aliases, :biz_domain, :biz_desc,
+                    :chat_turn_id, :tech_desc, :formula_expr, :base_sql,
+                    :time_grain, :dim_binding, :update_strategy,
+                    :schedule_id, :schedule_type, :version, :is_active,
+                    :sql_hash, :created_by, :updated_by, :created_at, :updated_at
+                )
+                """
+            ),
+            params,
+        )
+        metric_id = result.lastrowid
+        row = conn.execute(
+            text("SELECT * FROM metric_def WHERE id=:id"), {"id": metric_id}
+        ).first()
+    if not row:
+        raise RuntimeError("Failed to create metric definition.")
+    data = _row_to_dict(row)
+    _parse_json_fields(data, ["metric_aliases", "dim_binding"])
+    data["is_active"] = bool(data.get("is_active"))
+    return data
+
+
+def update_metric(metric_id: int, payload: MetricUpdate) -> Dict[str, Any]:
+    """Update mutable fields of a metric definition and refresh sql_hash when needed."""
+    updates: Dict[str, Any] = {}
+    for field in (
+        "metric_name",
+        "biz_domain",
+        "biz_desc",
+        "tech_desc",
+        "formula_expr",
+        "base_sql",
+        "time_grain",
+        "update_strategy",
+        "schedule_id",
+        "schedule_type",
+        "updated_by",
+    ):
+        value = getattr(payload, field)
+        if value is not None:
+            updates[field] = value
+
+    if payload.metric_aliases is not None:
+        updates["metric_aliases"] = _json_dump(payload.metric_aliases)
+    if payload.dim_binding is not None:
+        updates["dim_binding"] = _json_dump(payload.dim_binding)
+    if payload.is_active is not None:
+        updates["is_active"] = 1 if payload.is_active else 0
+    if payload.base_sql is not None:
+        updates["sql_hash"] = _metric_sql_hash(payload.base_sql)
+
+    if not updates:
+        current = get_metric(metric_id)
+        if not current:
+            raise KeyError(f"Metric {metric_id} not found.")
+        return current
+
+    updates["updated_at"] = datetime.utcnow()
+    set_clause = ", ".join(f"{key}=:{key}" for key in updates.keys())
+    params = dict(updates)
+    params["id"] = metric_id
+
+    engine = get_engine()
+    with engine.begin() as conn:
+        conn.execute(
+            text(f"UPDATE metric_def SET {set_clause} WHERE id=:id"),
+            params,
+        )
+        row = conn.execute(
+            text("SELECT * FROM metric_def WHERE id=:id"), {"id": metric_id}
+        ).first()
+    if not row:
+        raise KeyError(f"Metric {metric_id} not found.")
+    data = _row_to_dict(row)
+    _parse_json_fields(data, ["metric_aliases", "dim_binding"])
+    data["is_active"] = bool(data.get("is_active"))
+    return data
+
+
+def get_metric(metric_id: int) -> Optional[Dict[str, Any]]:
+    engine = get_engine()
+    with engine.begin() as conn:
+        row = conn.execute(
+            text("SELECT * FROM metric_def WHERE id=:id"), {"id": metric_id}
+        ).first()
+    if not row:
+        return None
+    data = _row_to_dict(row)
+    _parse_json_fields(data, ["metric_aliases", "dim_binding"])
+    data["is_active"] = bool(data.get("is_active"))
+    return data
+
+
+def list_metrics(
+    *,
+    biz_domain: Optional[str] = None,
+    is_active: Optional[bool] = None,
+    keyword: Optional[str] = None,
+    limit: int = 100,
+    offset: int = 0,
+) -> List[Dict[str, Any]]:
+    """List metric definitions with simple filters and pagination."""
+    conditions = []
+    params: Dict[str, Any] = {"limit": limit, "offset": offset}
+    if biz_domain:
+        conditions.append("biz_domain=:biz_domain")
+        params["biz_domain"] = biz_domain
+    if is_active is not None:
+        conditions.append("is_active=:is_active")
+        params["is_active"] = 1 if is_active else 0
+    if keyword:
+        conditions.append("(metric_code LIKE :kw OR metric_name LIKE :kw)")
+        params["kw"] = f"%{keyword}%"
+
+    where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
+    engine = get_engine()
+    with engine.begin() as conn:
+        rows = conn.execute(
+            text(
+                f"SELECT * FROM metric_def {where_clause} "
+                "ORDER BY updated_at DESC LIMIT :limit OFFSET :offset"
+            ),
+            params,
+        ).fetchall()
+    results: List[Dict[str, Any]] = []
+    for row in rows:
+        data = _row_to_dict(row)
+        _parse_json_fields(data, ["metric_aliases", "dim_binding"])
+        data["is_active"] = bool(data.get("is_active"))
+        results.append(data)
+    return results
+
+
+# Metric schedules
+def create_metric_schedule(payload: MetricScheduleCreate) -> Dict[str, Any]:
+    """Create a schedule record for a metric."""
+    engine = get_engine()
+    params = {
+        "metric_id": payload.metric_id,
+        "cron_expr": payload.cron_expr,
+        "enabled": 1 if payload.enabled else 0,
+        "priority": payload.priority,
+        "backfill_allowed": 1 if payload.backfill_allowed else 0,
+        "max_runtime_sec": payload.max_runtime_sec,
+        "retry_times": payload.retry_times,
+        "owner_team": payload.owner_team,
+        "owner_user_id": payload.owner_user_id,
+    }
+    with engine.begin() as conn:
+        result = conn.execute(
+            text(
+                """
+                INSERT INTO metric_schedule (
+                    metric_id, cron_expr, enabled, priority,
+                    backfill_allowed, max_runtime_sec, retry_times,
+                    owner_team, owner_user_id
+                ) VALUES (
+                    :metric_id, :cron_expr, :enabled, :priority,
+                    :backfill_allowed, :max_runtime_sec, :retry_times,
+                    :owner_team, :owner_user_id
+                )
+                """
+            ),
+            params,
+        )
+        schedule_id = result.lastrowid
+        row = conn.execute(
+            text("SELECT * FROM metric_schedule WHERE id=:id"), {"id": schedule_id}
+        ).first()
+    if not row:
+        raise RuntimeError("Failed to create metric schedule.")
+    data = _row_to_dict(row)
+    data["enabled"] = bool(data.get("enabled"))
+    data["backfill_allowed"] = bool(data.get("backfill_allowed"))
+    return data
+
+
+def update_metric_schedule(schedule_id: int, payload: MetricScheduleUpdate) -> Dict[str, Any]:
+    updates: Dict[str, Any] = {}
+    for field in (
+        "cron_expr",
+        "priority",
+        "max_runtime_sec",
+        "retry_times",
+        "owner_team",
+        "owner_user_id",
+    ):
+        value = getattr(payload, field)
+        if value is not None:
+            updates[field] = value
+    if payload.enabled is not None:
+        updates["enabled"] = 1 if payload.enabled else 0
+    if payload.backfill_allowed is not None:
+        updates["backfill_allowed"] = 1 if payload.backfill_allowed else 0
+
+    if not updates:
+        current = list_schedules_for_metric(schedule_id=schedule_id)
+        if current:
+            return current[0]
+        raise KeyError(f"Schedule {schedule_id} not found.")
+
+    set_clause = ", ".join(f"{key}=:{key}" for key in updates.keys())
+    params = dict(updates)
+    params["id"] = schedule_id
+
+    engine = get_engine()
+    with engine.begin() as conn:
+        conn.execute(
+            text(f"UPDATE metric_schedule SET {set_clause} WHERE id=:id"),
+            params,
+        )
+        row = conn.execute(
+            text("SELECT * FROM metric_schedule WHERE id=:id"), {"id": schedule_id}
+        ).first()
+    if not row:
+        raise KeyError(f"Schedule {schedule_id} not found.")
+    data = _row_to_dict(row)
+    data["enabled"] = bool(data.get("enabled"))
+    data["backfill_allowed"] = bool(data.get("backfill_allowed"))
+    return data
+
+
+def list_schedules_for_metric(metric_id: Optional[int] = None, schedule_id: Optional[int] = None) -> List[Dict[str, Any]]:
+    conditions = []
+    params: Dict[str, Any] = {}
+    if metric_id is not None:
+        conditions.append("metric_id=:metric_id")
+        params["metric_id"] = metric_id
+    if schedule_id is not None:
+        conditions.append("id=:id")
+        params["id"] = schedule_id
+    where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
+    engine = get_engine()
+    with engine.begin() as conn:
+        rows = conn.execute(
+            text(f"SELECT * FROM metric_schedule {where_clause} ORDER BY id DESC"),
+            params,
+        ).fetchall()
+    results: List[Dict[str, Any]] = []
+    for row in rows:
+        data = _row_to_dict(row)
+        data["enabled"] = bool(data.get("enabled"))
+        data["backfill_allowed"] = bool(data.get("backfill_allowed"))
+        results.append(data)
+    return results
+
+
+# Metric runs
+def trigger_metric_run(payload: MetricRunTrigger) -> Dict[str, Any]:
+    """Create a metric_job_run entry; execution is orchestrated elsewhere."""
+    metric = get_metric(payload.metric_id)
+    if not metric:
+        raise KeyError(f"Metric {payload.metric_id} not found.")
+    metric_version = payload.metric_version or metric.get("version", 1)
+    base_sql_snapshot = payload.base_sql_snapshot or metric.get("base_sql")
+    triggered_at = payload.triggered_at or datetime.utcnow()
+
+    params = {
+        "metric_id": payload.metric_id,
+        "schedule_id": payload.schedule_id,
+        "source_turn_id": payload.source_turn_id,
+        "data_time_from": payload.data_time_from,
+        "data_time_to": payload.data_time_to,
+        "metric_version": metric_version,
+        "base_sql_snapshot": base_sql_snapshot,
+        "status": "RUNNING",
+        "error_msg": None,
+        "affected_rows": None,
+        "runtime_ms": None,
+        "triggered_by": payload.triggered_by,
+        "triggered_at": triggered_at,
+        "started_at": None,
+        "finished_at": None,
+    }
+    engine = get_engine()
+    with engine.begin() as conn:
+        result = conn.execute(
+            text(
+                """
+                INSERT INTO metric_job_run (
+                    metric_id, schedule_id, source_turn_id,
+                    data_time_from, data_time_to, metric_version,
+                    base_sql_snapshot, status, error_msg,
+                    affected_rows, runtime_ms,
+                    triggered_by, triggered_at, started_at, finished_at
+                ) VALUES (
+                    :metric_id, :schedule_id, :source_turn_id,
+                    :data_time_from, :data_time_to, :metric_version,
+                    :base_sql_snapshot, :status, :error_msg,
+                    :affected_rows, :runtime_ms,
+                    :triggered_by, :triggered_at, :started_at, :finished_at
+                )
+                """
+            ),
+            params,
+        )
+        run_id = result.lastrowid
+        row = conn.execute(
+            text("SELECT * FROM metric_job_run WHERE id=:id"), {"id": run_id}
+        ).first()
+    if not row:
+        raise RuntimeError("Failed to create metric job run.")
+    return _row_to_dict(row)
+
+
+def get_metric_run(run_id: int) -> Optional[Dict[str, Any]]:
+    engine = get_engine()
+    with engine.begin() as conn:
+        row = conn.execute(
+            text("SELECT * FROM metric_job_run WHERE id=:id"), {"id": run_id}
+        ).first()
+    if not row:
+        return None
+    return _row_to_dict(row)
+
+
+def list_metric_runs(
+    *,
+    metric_id: Optional[int] = None,
+    status: Optional[str] = None,
+    limit: int = 100,
+    offset: int = 0,
+) -> List[Dict[str, Any]]:
+    conditions = []
+    params: Dict[str, Any] = {"limit": limit, "offset": offset}
+    if metric_id is not None:
+        conditions.append("metric_id=:metric_id")
+        params["metric_id"] = metric_id
+    if status is not None:
+        conditions.append("status=:status")
+        params["status"] = status
+    where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
+    engine = get_engine()
+    with engine.begin() as conn:
+        rows = conn.execute(
+            text(
+                f"SELECT * FROM metric_job_run {where_clause} "
+                "ORDER BY triggered_at DESC LIMIT :limit OFFSET :offset"
+            ),
+            params,
+        ).fetchall()
+    return [_row_to_dict(row) for row in rows]
+
+
+# Metric results
+def write_metric_results(payload: MetricResultsWriteRequest) -> int:
+    """Bulk insert metric_result rows for a metric/version."""
+    metric = get_metric(payload.metric_id)
+    if not metric:
+        raise KeyError(f"Metric {payload.metric_id} not found.")
+    default_version = metric.get("version", 1)
+    now = datetime.utcnow()
+    rows: List[Dict[str, Any]] = []
+    for item in payload.results:
+        rows.append(
+            {
+                "metric_id": payload.metric_id,
+                "metric_version": item.metric_version or default_version,
+                "stat_time": item.stat_time,
+                "extra_dims": _json_dump(item.extra_dims),
+                "metric_value": item.metric_value,
+                "load_time": item.load_time or now,
+                "data_version": item.data_version,
+            }
+        )
+    if not rows:
+        return 0
+    engine = get_engine()
+    with engine.begin() as conn:
+        conn.execute(
+            text(
+                """
+                INSERT INTO metric_result (
+                    metric_id, metric_version, stat_time,
+                    extra_dims, metric_value, load_time, data_version
+                ) VALUES (
+                    :metric_id, :metric_version, :stat_time,
+                    :extra_dims, :metric_value, :load_time, :data_version
+                )
+                """
+            ),
+            rows,
+        )
+    return len(rows)
+
+
+def query_metric_results(
+    *,
+    metric_id: int,
+    stat_from: Optional[datetime] = None,
+    stat_to: Optional[datetime] = None,
+    limit: int = 200,
+    offset: int = 0,
+) -> List[Dict[str, Any]]:
+    conditions = ["metric_id=:metric_id"]
+    params: Dict[str, Any] = {
+        "metric_id": metric_id,
+        "limit": limit,
+        "offset": offset,
+    }
+    if stat_from is not None:
+        conditions.append("stat_time>=:stat_from")
+        params["stat_from"] = stat_from
+    if stat_to is not None:
+        conditions.append("stat_time<=:stat_to")
+        params["stat_to"] = stat_to
+
+    where_clause = "WHERE " + " AND ".join(conditions)
+    engine = get_engine()
+    with engine.begin() as conn:
+        rows = conn.execute(
+            text(
+                f"SELECT * FROM metric_result {where_clause} "
+                "ORDER BY stat_time DESC LIMIT :limit OFFSET :offset"
+            ),
+            params,
+        ).fetchall()
+    results: List[Dict[str, Any]] = []
+    for row in rows:
+        data = _row_to_dict(row)
+        _parse_json_fields(data, ["extra_dims"])
+        results.append(data)
+    return results
+
+
+def latest_metric_result(metric_id: int) -> Optional[Dict[str, Any]]:
+    engine = get_engine()
+    with engine.begin() as conn:
+        row = conn.execute(
+            text(
+                """
+                SELECT * FROM metric_result
+                WHERE metric_id=:metric_id
+                ORDER BY stat_time DESC
+                LIMIT 1
+                """
+            ),
+            {"metric_id": metric_id},
+        ).first()
+    if not row:
+        return None
+    data = _row_to_dict(row)
+    _parse_json_fields(data, ["extra_dims"])
+    return data
--- a/app/services/table_profiling.py
+++ b/app/services/table_profiling.py
@ -24,6 +24,7 @@ from app.services import LLMGateway
 from app.settings import DEFAULT_IMPORT_MODEL
 from app.services.import_analysis import (
    IMPORT_GATEWAY_BASE_URL,
+    build_import_gateway_headers,
    resolve_provider_from_model,
 )
 from app.utils.llm_usage import extract_usage as extract_llm_usage
@ -532,6 +533,7 @@ async def _call_chat_completions(
    temperature: float = 0.2,
    timeout_seconds: Optional[float] = None,
 ) -> Any:
+    # Normalize model spec to provider+model and issue the unified chat call.
    provider, model_name = resolve_provider_from_model(model_spec)
    payload = {
        "provider": provider.value,
@ -545,16 +547,17 @@ async def _call_chat_completions(
    payload_size_bytes = len(json.dumps(payload, ensure_ascii=False).encode("utf-8"))

    url = f"{IMPORT_GATEWAY_BASE_URL.rstrip('/')}/v1/chat/completions"
+    headers = build_import_gateway_headers()
    try:
-        # log the request whole info
        logger.info(
-            "Calling chat completions API %s with model %s and size %s and payload %s",
+            "Calling chat completions API %s with model=%s payload_size=%sB",
            url,
            model_name,
            payload_size_bytes,
-            payload,
        )
-        response = await client.post(url, json=payload, timeout=timeout_seconds)
+        response = await client.post(
+            url, json=payload, timeout=timeout_seconds, headers=headers
+        )

        response.raise_for_status()
    except httpx.HTTPError as exc:
@ -703,6 +706,7 @@ async def _run_action_with_callback(
    input_payload: Any = None,
    model_spec: Optional[str] = None,
 ) -> Any:
+    # Execute a pipeline action and always emit a callback capturing success/failure.
    if input_payload is not None:
        logger.info(
            "Pipeline action %s input: %s",
--- a/app/services/table_snippet.py
+++ b/app/services/table_snippet.py
@ -38,6 +38,13 @@ def _prepare_table_schema(value: Any) -> str:
    return json.dumps(value, ensure_ascii=False)


+def _prepare_model_params(params: Dict[str, Any] | None) -> str | None:
+    if not params:
+        return None
+    serialized, _ = _serialize_json(params)
+    return serialized
+
+
 def _collect_common_columns(request: TableSnippetUpsertRequest) -> Dict[str, Any]:
    logger.debug(
        "Collecting common columns for table_id=%s version_ts=%s action_type=%s",
@ -53,6 +60,8 @@ def _collect_common_columns(request: TableSnippetUpsertRequest) -> Dict[str, Any
        "callback_url": str(request.callback_url),
        "table_schema_version_id": request.table_schema_version_id,
        "table_schema": _prepare_table_schema(request.table_schema),
+        "model": request.model,
+        "model_provider": request.model_provider,
    }

    payload.update(
@ -72,6 +81,8 @@ def _collect_common_columns(request: TableSnippetUpsertRequest) -> Dict[str, Any
        }
    )

+    payload["model_params"] = _prepare_model_params(request.model_params)
+
    if request.llm_usage is not None:
        llm_usage_json, _ = _serialize_json(request.llm_usage)
        if llm_usage_json is not None:
--- a/app/settings.py
+++ b/app/settings.py
@ -20,7 +20,11 @@ PROVIDER_KEY_ENV_MAP: Dict[str, str] = {
 }


-DEFAULT_IMPORT_MODEL = os.getenv("DEFAULT_IMPORT_MODEL", "openai:gpt-4.1-mini")
+DEFAULT_IMPORT_MODEL = os.getenv("DEFAULT_IMPORT_MODEL", "deepseek:deepseek-chat")
+NEW_API_BASE_URL = os.getenv("NEW_API_BASE_URL")
+NEW_API_AUTH_TOKEN = os.getenv("NEW_API_AUTH_TOKEN")
+RAG_API_BASE_URL = os.getenv("RAG_API_BASE_URL", "http://127.0.0.1:8000")
+RAG_API_AUTH_TOKEN = os.getenv("RAG_API_AUTH_TOKEN")


@lru_cache(maxsize=1)
--- a/deepseek-result.json
+++ b/deepseek-result.json
@ -1,41 +0,0 @@
-{
-    "provider": "deepseek",
-    "model": "deepseek-chat",
-    "choices": [
-        {
-            "index": 0,
-            "message": {
-                "role": "assistant",
-                "content": "```json\n{\n  \"table_name\": \"national_brand_sales\",\n  \"description\": \"全国品牌系统外销售数据\",\n  \"columns\": [\n    {\n      \"original_name\": \"品牌\",\n      \"standard_name\": \"brand\",\n      \"data_type\": \"string\",\n      \"db_type\": \"varchar(50)\",\n      \"java_type\": \"string\",\n      \"nullable\": true,\n      \"distinct_count_sample\": 5,\n      \"null_ratio_sample\": 0.4,\n      \"is_enum_candidate\": false,\n      \"description\": \"品牌名称\",\n      \"date_format\": null\n    },\n    {\n      \"original_name\": \"产品价类\",\n      \"standard_name\": \"price_category\",\n      \"data_type\": \"string\",\n      \"db_type\": \"varchar(10)\",\n      \"java_type\": \"string\",\n      \"nullable\": false,\n      \"distinct_count_sample\": 3,\n      \"null_ratio_sample\": 0.0,\n      \"is_enum_candidate\": true,\n      \"description\": \"产品价格分类（一类/二类/三类）\",\n      \"date_format\": null\n    },\n    {\n      \"original_name\": \"是否重点品牌"
-            }
-        }
-    ],
-    "raw": {
-        "id": "67f3cc80-38bc-4bb7-b336-48d4886722c4",
-        "object": "chat.completion",
-        "created": 1761752207,
-        "model": "deepseek-chat",
-        "choices": [
-            {
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": "```json\n{\n  \"table_name\": \"national_brand_sales\",\n  \"description\": \"全国品牌系统外销售数据\",\n  \"columns\": [\n    {\n      \"original_name\": \"品牌\",\n      \"standard_name\": \"brand\",\n      \"data_type\": \"string\",\n      \"db_type\": \"varchar(50)\",\n      \"java_type\": \"string\",\n      \"nullable\": true,\n      \"distinct_count_sample\": 5,\n      \"null_ratio_sample\": 0.4,\n      \"is_enum_candidate\": false,\n      \"description\": \"品牌名称\",\n      \"date_format\": null\n    },\n    {\n      \"original_name\": \"产品价类\",\n      \"standard_name\": \"price_category\",\n      \"data_type\": \"string\",\n      \"db_type\": \"varchar(10)\",\n      \"java_type\": \"string\",\n      \"nullable\": false,\n      \"distinct_count_sample\": 3,\n      \"null_ratio_sample\": 0.0,\n      \"is_enum_candidate\": true,\n      \"description\": \"产品价格分类（一类/二类/三类）\",\n      \"date_format\": null\n    },\n    {\n      \"original_name\": \"是否重点品牌"
-                },
-                "logprobs": null,
-                "finish_reason": "length"
-            }
-        ],
-        "usage": {
-            "prompt_tokens": 1078,
-            "completion_tokens": 256,
-            "total_tokens": 1334,
-            "prompt_tokens_details": {
-                "cached_tokens": 1024
-            },
-            "prompt_cache_hit_tokens": 1024,
-            "prompt_cache_miss_tokens": 54
-        },
-        "system_fingerprint": "fp_ffc7281d48_prod0820_fp8_kvcache"
-    }
-}
--- a/demo/水务/水务-gemini2.5-ge-result.json
+++ b/demo/水务/水务-gemini2.5-ge-result.json
@ -0,0 +1 @@
+{"role": "dimension", "time": {"range": null, "column": null, "has_gaps": null, "granularity": "unknown"}, "grain": ["service_point_id"], "table": "data-ge.water_meter_info", "columns": [{"name": "meter_subtype", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 9, "distinct_ratio": 0.03, "pk_candidate_score": 0.03, "metric_candidate_score": 0.0}, {"name": "installation_position", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 4, "distinct_ratio": 0.013333333333333334, "pk_candidate_score": 0.013333333333333334, "metric_candidate_score": 0.0}, {"name": "supply_office", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 11, "distinct_ratio": 0.03666666666666667, "pk_candidate_score": 0.03666666666666667, "metric_candidate_score": 0.0}, {"name": "meter_diameter", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 8, "distinct_ratio": 0.02666666666666667, "pk_candidate_score": 0.02666666666666667, "metric_candidate_score": 0.0}, {"name": "account_id", "dtype": "unknown", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列的统计指标（如空值率、唯一性）缺失，但根据命名规则推断为ID。", "enumish": null, "null_rate": null, "top_values": [], "semantic_type": "id", "distinct_count": null, "distinct_ratio": null, "pk_candidate_score": 0.9, "metric_candidate_score": 0.0}, {"name": "service_point_id", "dtype": "unknown", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列的统计指标（如空值率、唯一性）缺失，但根据命名规则推断为ID。", "enumish": null, "null_rate": null, "top_values": [], "semantic_type": "id", "distinct_count": null, "distinct_ratio": null, "pk_candidate_score": 0.95, "metric_candidate_score": 0.0}, {"name": "station", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 36, "distinct_ratio": 0.12, "pk_candidate_score": 0.12, "metric_candidate_score": 0.0}, {"name": "meter_type", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 5, "distinct_ratio": 0.016666666666666666, "pk_candidate_score": 0.016666666666666666, "metric_candidate_score": 0.0}, {"name": "district", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 13, "distinct_ratio": 0.043333333333333335, "pk_candidate_score": 0.043333333333333335, "metric_candidate_score": 0.0}, {"name": "meter_status", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列只有一个唯一值 '有效'。", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 1, "distinct_ratio": 0.0033333333333333335, "pk_candidate_score": 0.0033333333333333335, "metric_candidate_score": 0.0}], "quality": {"warning_hints": ["列 'meter_status' 只有一个唯一值 '有效'，可能为常量列。"], "failed_expectations": []}, "row_count": 300, "fk_candidates": [], "confidence_notes": ["表角色(role)被推断为 'dimension'，因为其列几乎完全由ID和类别属性构成，且缺少数值指标或时间序列列。", "主键候选(primary_key_candidates) 'service_point_id' 和 'account_id' 是基于命名约定（包含'_id'）推断的。其唯一性和非空性未在GE结果中直接度量，因此这是一个高置信度的猜测。", "表粒度(grain)可能为 'service_point'，与推断的主键 'service_point_id' 相对应。", "未根据列名或数据格式识别出时间列。"], "primary_key_candidates": [["service_point_id"], ["account_id"]]}
--- a/demo/水务/水务-gemini2.5-snippet-alias.json
+++ b/demo/水务/水务-gemini2.5-snippet-alias.json
@ -0,0 +1,180 @@
+[
+    {
+        "id": "snpt_count-service-points-by-dimension",
+        "aliases": [
+            {
+                "text": "各个区有多少水表",
+                "tone": "口语"
+            },
+            {
+                "text": "按维度统计用水点数",
+                "tone": "中性"
+            },
+            {
+                "text": "各维度用水点数量分布",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "用水点数",
+            "service_point_count",
+            "数量",
+            "统计",
+            "汇总",
+            "aggregate",
+            "维度",
+            "dimension",
+            "区域",
+            "district",
+            "供水所",
+            "分组统计",
+            "水表"
+        ],
+        "intent_tags": [
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_topn-service-points-by-dimension",
+        "aliases": [
+            {
+                "text": "哪个地方水表最多",
+                "tone": "口语"
+            },
+            {
+                "text": "用水点数Top-N排名",
+                "tone": "中性"
+            },
+            {
+                "text": "Top-N用水点数维度排行",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "Top-N",
+            "top",
+            "排名",
+            "排行",
+            "ranking",
+            "最多",
+            "用水点数",
+            "service_point_count",
+            "维度",
+            "dimension",
+            "站点",
+            "station",
+            "水表"
+        ],
+        "intent_tags": [
+            "topn",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_ratio-service-points-by-dimension",
+        "aliases": [
+            {
+                "text": "各种水表各占多少",
+                "tone": "口语"
+            },
+            {
+                "text": "各维度用水点数占比",
+                "tone": "中性"
+            },
+            {
+                "text": "用水点维度构成分析",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "占比",
+            "percentage",
+            "百分比",
+            "ratio",
+            "构成",
+            "分布",
+            "用水点数",
+            "水表类型",
+            "meter_type",
+            "维度",
+            "dimension",
+            "水表"
+        ],
+        "intent_tags": [
+            "ratio",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_quality-check-duplicate-spid",
+        "aliases": [
+            {
+                "text": "有没有重复的水表号",
+                "tone": "口语"
+            },
+            {
+                "text": "检查重复的用水点ID",
+                "tone": "中性"
+            },
+            {
+                "text": "用水点ID唯一性校验",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "数据质量",
+            "quality",
+            "检查",
+            "校验",
+            "重复",
+            "duplicate",
+            "唯一性",
+            "uniqueness",
+            "用水点ID",
+            "service_point_id",
+            "异常检测",
+            "主键"
+        ],
+        "intent_tags": [
+            "quality",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_sample-filter-service-points-by-dims",
+        "aliases": [
+            {
+                "text": "给我看城区的机械表",
+                "tone": "口语"
+            },
+            {
+                "text": "按多维度筛选用水点",
+                "tone": "中性"
+            },
+            {
+                "text": "多维组合条件过滤用水点",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "筛选",
+            "过滤",
+            "filter",
+            "查询",
+            "明细",
+            "列表",
+            "sample",
+            "用水点",
+            "区域",
+            "district",
+            "水表类型",
+            "meter_type",
+            "条件查询"
+        ],
+        "intent_tags": [
+            "sample",
+            "filter"
+        ]
+    }
+]
--- a/demo/水务/水务-gemini2.5-snippet.json
+++ b/demo/水务/水务-gemini2.5-snippet.json
@ -0,0 +1,186 @@
+[
+    {
+        "id": "snpt_count-service-points-by-dimension",
+        "desc": "按指定维度（如区域、供水所）分组，统计各分类下的用水点数量。",
+        "type": "aggregate",
+        "title": "按维度统计用水点数",
+        "examples": [
+            "按区域统计用水点数量",
+            "各个供水所分别有多少个用水点"
+        ],
+        "variables": [
+            {
+                "name": "dimension_column",
+                "type": "column",
+                "default": "district"
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT\n  `${dimension_column}`,\n  COUNT(DISTINCT service_point_id) AS service_point_count\nFROM\n  `data-ge.water_meter_info`\nGROUP BY\n  `${dimension_column}`\nORDER BY\n  service_point_count DESC;"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "适用于对水表档案信息进行分类汇总统计。",
+                    "可将变量 ${dimension_column} 替换为任一维度列，如 district, supply_office, station, meter_type 等。"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": null
+            },
+            "time_column": null,
+            "required_columns": [
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "用水点数：对 `service_point_id` 进行去重计数，代表一个独立的服务点（通常对应一个水表）。统计粒度为“指定维度”。"
+    },
+    {
+        "id": "snpt_topn-service-points-by-dimension",
+        "desc": "按指定维度（如区域、站点）统计用水点数，并展示数量最多的前N个分类。",
+        "type": "topn",
+        "title": "Top-N 用水点数维度排名",
+        "examples": [
+            "哪个区域的用水点最多",
+            "用水点数排名前5的站点是哪些"
+        ],
+        "variables": [
+            {
+                "name": "dimension_column",
+                "type": "column",
+                "default": "station"
+            },
+            {
+                "name": "top_n",
+                "type": "int",
+                "default": 10
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT\n  `${dimension_column}`,\n  COUNT(DISTINCT service_point_id) AS service_point_count\nFROM\n  `data-ge.water_meter_info`\nGROUP BY\n  `${dimension_column}`\nORDER BY\n  service_point_count DESC\nLIMIT ${top_n};"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "维度 `station` 基数较高 (36)，建议 Top-N 查询时结合业务场景合理设置 N 值。"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 36
+            },
+            "time_column": null,
+            "required_columns": [
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "用水点数：对 `service_point_id` 进行去重计数。排名依据为各维度分类下的用水点总数。统计粒度为“指定维度”。"
+    },
+    {
+        "id": "snpt_ratio-service-points-by-dimension",
+        "desc": "计算在指定维度下，各分类的用水点数占总用水点数的百分比，以分析其分布构成。",
+        "type": "ratio",
+        "title": "各维度用水点数占比",
+        "examples": [
+            "不同水表类型（meter_type）的分布情况",
+            "各个区域的用水点占比是多少"
+        ],
+        "variables": [
+            {
+                "name": "dimension_column",
+                "type": "column",
+                "default": "meter_type"
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT\n  `${dimension_column}`,\n  COUNT(DISTINCT service_point_id) AS service_point_count,\n  COUNT(DISTINCT service_point_id) * 100.0 / SUM(COUNT(DISTINCT service_point_id)) OVER () AS percentage\nFROM\n  `data-ge.water_meter_info`\nGROUP BY\n  `${dimension_column}`\nORDER BY\n  service_point_count DESC;"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "SQL模板使用了窗口函数 SUM() OVER()，请确保MySQL版本支持（8.0+）。"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": null
+            },
+            "time_column": null,
+            "required_columns": [
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "用水点数占比：某分类下的用水点数 / 总用水点数。用水点数以 `service_point_id` 去重计数。统计粒度为“指定维度”。"
+    },
+    {
+        "id": "snpt_quality-check-duplicate-spid",
+        "desc": "查找在用水点信息表中存在重复的 `service_point_id`，用于数据质量校验。",
+        "type": "quality",
+        "title": "检查重复的用水点ID",
+        "examples": [
+            "检查是否存在重复的水表档案",
+            "校验用水点ID的唯一性"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT\n  service_point_id,\n  COUNT(*) AS occurrences\nFROM\n  `data-ge.water_meter_info`\nGROUP BY\n  service_point_id\nHAVING\n  COUNT(*) > 1;"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "预期返回结果为空。若有返回，则表示数据存在一致性问题，`service_point_id` 未能作为唯一主键。"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": null
+            },
+            "time_column": null,
+            "required_columns": [
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "重复项：指 `service_point_id` 出现次数大于1的记录。此ID应为表的主键，理论上不应重复。"
+    },
+    {
+        "id": "snpt_sample-filter-service-points-by-dims",
+        "desc": "根据区域、水表类型、供水所等多个维度组合条件，筛选出符合条件的用水点明细。",
+        "type": "sample",
+        "title": "多维度筛选用水点列表",
+        "examples": [
+            "查询城区的机械表有哪些",
+            "拉取某个供水所下特定口径水表的列表"
+        ],
+        "variables": [
+            {
+                "name": "district_name",
+                "type": "string",
+                "default": "城区"
+            },
+            {
+                "name": "meter_type_name",
+                "type": "string",
+                "default": "机械表"
+            },
+            {
+                "name": "limit_num",
+                "type": "int",
+                "default": 100
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT\n  service_point_id,\n  account_id,\n  district,\n  supply_office,\n  meter_type,\n  meter_subtype,\n  meter_diameter\nFROM\n  `data-ge.water_meter_info`\nWHERE\n  district = '${district_name}'\n  AND meter_type = '${meter_type_name}'\n  -- AND meter_status = '有效' -- 可选：根据画像，该列为常量'有效'，可不加\nLIMIT ${limit_num};"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [],
+                "fk_join_available": false,
+                "dim_cardinality_hint": null
+            },
+            "time_column": null,
+            "required_columns": [
+                "service_point_id",
+                "account_id",
+                "district",
+                "supply_office",
+                "meter_type",
+                "meter_subtype",
+                "meter_diameter"
+            ]
+        },
+        "business_caliber": "返回满足所有筛选条件的用水点明细信息。`meter_status` 列只有一个值 '有效'，通常无需作为筛选条件。"
+    }
+]
--- a/demo/水务/水务-gpt5-ge-desc.json
+++ b/demo/水务/水务-gpt5-ge-desc.json
@ -0,0 +1,230 @@
+{
+    "role": "dimension",
+    "time": {
+        "range": null,
+        "column": null,
+        "has_gaps": null,
+        "granularity": "unknown"
+    },
+    "grain": [
+        "service_point_id"
+    ],
+    "table": "data-ge.water_meter_info",
+    "columns": [
+        {
+            "name": "supply_office",
+            "dtype": "string",
+            "stats": {
+                "max": null,
+                "min": null,
+                "std": null,
+                "mean": null,
+                "skewness": null
+            },
+            "comment": "非空；11 个枚举值（GE 约束）",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [],
+            "semantic_type": "dimension",
+            "distinct_count": 11,
+            "distinct_ratio": 0.03666666666666667,
+            "pk_candidate_score": 0.05,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "station",
+            "dtype": "string",
+            "stats": {
+                "max": null,
+                "min": null,
+                "std": null,
+                "mean": null,
+                "skewness": null
+            },
+            "comment": "非空；36 个枚举值（GE 约束）",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [],
+            "semantic_type": "dimension",
+            "distinct_count": 36,
+            "distinct_ratio": 0.12,
+            "pk_candidate_score": 0.1,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "district",
+            "dtype": "string",
+            "stats": {
+                "max": null,
+                "min": null,
+                "std": null,
+                "mean": null,
+                "skewness": null
+            },
+            "comment": "非空；13 个枚举值（GE 约束）",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [],
+            "semantic_type": "dimension",
+            "distinct_count": 13,
+            "distinct_ratio": 0.043333333333333335,
+            "pk_candidate_score": 0.05,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "meter_diameter",
+            "dtype": "string",
+            "stats": {
+                "max": null,
+                "min": null,
+                "std": null,
+                "mean": null,
+                "skewness": null
+            },
+            "comment": "非空；8 个枚举值（GE 约束）",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [],
+            "semantic_type": "dimension",
+            "distinct_count": 8,
+            "distinct_ratio": 0.02666666666666667,
+            "pk_candidate_score": 0.03,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "meter_status",
+            "dtype": "string",
+            "stats": {
+                "max": null,
+                "min": null,
+                "std": null,
+                "mean": null,
+                "skewness": null
+            },
+            "comment": "非空；单一取值（\"有效\"）",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [],
+            "semantic_type": "dimension",
+            "distinct_count": 1,
+            "distinct_ratio": 0.0033333333333333335,
+            "pk_candidate_score": 0.0,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "meter_subtype",
+            "dtype": "string",
+            "stats": {
+                "max": null,
+                "min": null,
+                "std": null,
+                "mean": null,
+                "skewness": null
+            },
+            "comment": "非空；9 个枚举值（GE 约束）",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [],
+            "semantic_type": "dimension",
+            "distinct_count": 9,
+            "distinct_ratio": 0.03,
+            "pk_candidate_score": 0.03,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "meter_type",
+            "dtype": "string",
+            "stats": {
+                "max": null,
+                "min": null,
+                "std": null,
+                "mean": null,
+                "skewness": null
+            },
+            "comment": "非空；5 个枚举值（GE 约束）",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [],
+            "semantic_type": "dimension",
+            "distinct_count": 5,
+            "distinct_ratio": 0.016666666666666666,
+            "pk_candidate_score": 0.02,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "installation_position",
+            "dtype": "string",
+            "stats": {
+                "max": null,
+                "min": null,
+                "std": null,
+                "mean": null,
+                "skewness": null
+            },
+            "comment": "非空；4 个枚举值（GE 约束）",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [],
+            "semantic_type": "dimension",
+            "distinct_count": 4,
+            "distinct_ratio": 0.013333333333333334,
+            "pk_candidate_score": 0.02,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "service_point_id",
+            "dtype": "unknown",
+            "stats": {
+                "max": null,
+                "min": null,
+                "std": null,
+                "mean": null,
+                "skewness": null
+            },
+            "comment": "命名指示标识列；未提供唯一性或非空验证",
+            "enumish": null,
+            "null_rate": null,
+            "top_values": [],
+            "semantic_type": "id",
+            "distinct_count": null,
+            "distinct_ratio": null,
+            "pk_candidate_score": 0.6,
+            "metric_candidate_score": 0.05
+        },
+        {
+            "name": "account_id",
+            "dtype": "unknown",
+            "stats": {
+                "max": null,
+                "min": null,
+                "std": null,
+                "mean": null,
+                "skewness": null
+            },
+            "comment": "命名指示账户标识；未提供唯一性或非空验证",
+            "enumish": null,
+            "null_rate": null,
+            "top_values": [],
+            "semantic_type": "id",
+            "distinct_count": null,
+            "distinct_ratio": null,
+            "pk_candidate_score": 0.5,
+            "metric_candidate_score": 0.05
+        }
+    ],
+    "quality": {
+        "warning_hints": [
+            "以下列未设置非空校验：service_point_id, account_id（空值情况未知）",
+            "未识别到时间列"
+        ],
+        "failed_expectations": []
+    },
+    "row_count": 300,
+    "fk_candidates": [],
+    "confidence_notes": [
+        "role 判定为 dimension：表内列均为枚举/分类或ID，未发现数值型度量或时间列；34/34 期望均为分类枚举/非空与去重比例。",
+        "grain 猜测为 service_point_id：仅依据命名启发式，缺少唯一性与非空度量佐证（置信度较低）。",
+        "未识别时间列：列名与期望均未涉及日期/时间，也无最小/最大时间范围可推断。"
+    ],
+    "primary_key_candidates": []
+}
--- a/demo/水务/水务-gpt5-snippet-alias.json
+++ b/demo/水务/水务-gpt5-snippet-alias.json
@ -0,0 +1,372 @@
+[
+    {
+        "id": "snpt_topn_station",
+        "aliases": [
+            {
+                "text": "站点水表排行前N",
+                "tone": "中性"
+            },
+            {
+                "text": "哪个站点表最多",
+                "tone": "口语"
+            },
+            {
+                "text": "按站点水表TopN",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "TopN",
+            "排名",
+            "排行",
+            "station",
+            "站点",
+            "水表数",
+            "meter count",
+            "distinct",
+            "去重",
+            "聚合",
+            "排序",
+            "榜单"
+        ],
+        "intent_tags": [
+            "topn",
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_share_district",
+        "aliases": [
+            {
+                "text": "各辖区水表占比",
+                "tone": "中性"
+            },
+            {
+                "text": "哪个辖区占比高",
+                "tone": "口语"
+            },
+            {
+                "text": "按辖区水表比例",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "占比",
+            "ratio",
+            "district",
+            "辖区",
+            "水表数",
+            "meter count",
+            "distinct",
+            "去重",
+            "百分比",
+            "份额",
+            "聚合",
+            "排序",
+            "分布"
+        ],
+        "intent_tags": [
+            "ratio",
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_dist_diameter",
+        "aliases": [
+            {
+                "text": "表径水表数分布",
+                "tone": "中性"
+            },
+            {
+                "text": "不同口径有多少",
+                "tone": "口语"
+            },
+            {
+                "text": "按表径去重计数",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "分布",
+            "distribution",
+            "meter_diameter",
+            "表径",
+            "水表数",
+            "meter count",
+            "distinct",
+            "去重",
+            "聚合",
+            "类别",
+            "category",
+            "条形图",
+            "饼图",
+            "排行"
+        ],
+        "intent_tags": [
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_type_subtype_matrix",
+        "aliases": [
+            {
+                "text": "类型×子类水表数",
+                "tone": "中性"
+            },
+            {
+                "text": "看各类型各子类",
+                "tone": "口语"
+            },
+            {
+                "text": "类型子类组合统计",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "类型",
+            "type",
+            "子类",
+            "subtype",
+            "组合",
+            "matrix",
+            "交叉分析",
+            "cross-tab",
+            "水表数",
+            "meter count",
+            "distinct",
+            "去重",
+            "分布",
+            "聚合",
+            "维度"
+        ],
+        "intent_tags": [
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_quality_spid_uniq",
+        "aliases": [
+            {
+                "text": "服务点ID唯一性检",
+                "tone": "专业"
+            },
+            {
+                "text": "服务点ID有重复吗",
+                "tone": "口语"
+            },
+            {
+                "text": "服务点ID完整性评估",
+                "tone": "中性"
+            }
+        ],
+        "keywords": [
+            "质量检查",
+            "quality",
+            "唯一性",
+            "uniqueness",
+            "重复",
+            "duplicate",
+            "空值",
+            "NULL",
+            "完整性",
+            "integrity",
+            "service_point_id",
+            "数据质量",
+            "统计",
+            "去重",
+            "异常检测"
+        ],
+        "intent_tags": [
+            "quality"
+        ]
+    },
+    {
+        "id": "snpt_quality_account_nulls",
+        "aliases": [
+            {
+                "text": "账户ID缺失明细",
+                "tone": "中性"
+            },
+            {
+                "text": "看看哪些账户为空",
+                "tone": "口语"
+            },
+            {
+                "text": "account_id空值样本",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "质量检查",
+            "缺失",
+            "missing",
+            "空值",
+            "NULL",
+            "account_id",
+            "样本",
+            "sample",
+            "抽样",
+            "sampling",
+            "明细",
+            "排查",
+            "过滤",
+            "WHERE",
+            "LIMIT"
+        ],
+        "intent_tags": [
+            "quality",
+            "sample"
+        ]
+    },
+    {
+        "id": "snpt_sample_random_rows",
+        "aliases": [
+            {
+                "text": "随机抽样水表明细",
+                "tone": "中性"
+            },
+            {
+                "text": "随机取几条看看",
+                "tone": "口语"
+            },
+            {
+                "text": "RAND()样本抽取",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "随机",
+            "random",
+            "样本",
+            "sample",
+            "抽样",
+            "sampling",
+            "明细",
+            "details",
+            "质检",
+            "QA",
+            "RAND()",
+            "LIMIT",
+            "抽取",
+            "数据验证"
+        ],
+        "intent_tags": [
+            "sample"
+        ]
+    },
+    {
+        "id": "snpt_filter_office_type_where",
+        "aliases": [
+            {
+                "text": "按所与类型过滤有效",
+                "tone": "专业"
+            },
+            {
+                "text": "筛选某所的指定类型",
+                "tone": "中性"
+            },
+            {
+                "text": "只看这所的这种表",
+                "tone": "口语"
+            }
+        ],
+        "keywords": [
+            "过滤",
+            "filter",
+            "WHERE",
+            "supply_office",
+            "营业所",
+            "meter_type",
+            "类型",
+            "meter_status",
+            "有效",
+            "条件片段",
+            "筛选",
+            "查询拼接",
+            "字段",
+            "约束"
+        ],
+        "intent_tags": [
+            "filter"
+        ]
+    },
+    {
+        "id": "snpt_office_station_dist",
+        "aliases": [
+            {
+                "text": "所站组合水表数",
+                "tone": "中性"
+            },
+            {
+                "text": "各站在各所有多少",
+                "tone": "口语"
+            },
+            {
+                "text": "营业所×站点分布",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "supply_office",
+            "营业所",
+            "station",
+            "站点",
+            "层级",
+            "hierarchy",
+            "分布",
+            "distribution",
+            "水表数",
+            "meter count",
+            "distinct",
+            "去重",
+            "聚合",
+            "交叉分析",
+            "排行"
+        ],
+        "intent_tags": [
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_total_meter_baseline",
+        "aliases": [
+            {
+                "text": "水表总量基线",
+                "tone": "中性"
+            },
+            {
+                "text": "现在有多少水表",
+                "tone": "口语"
+            },
+            {
+                "text": "全表去重总数",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "总量",
+            "total",
+            "baseline",
+            "基线",
+            "水表总数",
+            "meter total",
+            "service_point_id",
+            "distinct",
+            "去重",
+            "分母",
+            "denominator",
+            "占比",
+            "聚合",
+            "汇总",
+            "snapshot"
+        ],
+        "intent_tags": [
+            "aggregate"
+        ]
+    }
+]
--- a/demo/水务/水务-gpt5-snippet.json
+++ b/demo/水务/水务-gpt5-snippet.json
@ -0,0 +1,330 @@
+[
+    {
+        "id": "snpt_topn_station",
+        "desc": "按站点统计水表数量并取前N",
+        "type": "topn",
+        "title": "站点TopN水表数",
+        "examples": [
+            "各站点水表数量排名前10",
+            "站点水表覆盖情况排行"
+        ],
+        "variables": [
+            {
+                "name": "top_n",
+                "type": "int",
+                "default": 10
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT station,\n       COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY station\nORDER BY meter_cnt DESC\nLIMIT {{top_n}};"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "TopN建议N<=36",
+                    "以service_point_id去重计数",
+                    "无时间列，无法做趋势"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 36
+            },
+            "time_column": null,
+            "required_columns": [
+                "station",
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "水表数=按service_point_id去重计数；粒度=站点。仅统计当前表中的有效记录（不含时间口径）。安全限制：用于分析排名，避免扩大LIMIT造成全量导出。"
+    },
+    {
+        "id": "snpt_share_district",
+        "desc": "统计各辖区水表数及其占比",
+        "type": "ratio",
+        "title": "辖区水表占比",
+        "examples": [
+            "各辖区水表占比",
+            "哪个辖区水表最多"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "WITH by_district AS (\n  SELECT district, COUNT(DISTINCT service_point_id) AS meter_cnt\n  FROM `data-ge`.`water_meter_info`\n  GROUP BY district\n), tot AS (\n  SELECT COUNT(DISTINCT service_point_id) AS total_cnt\n  FROM `data-ge`.`water_meter_info`\n)\nSELECT b.district,\n       b.meter_cnt,\n       ROUND(b.meter_cnt / NULLIF(t.total_cnt, 0) * 100, 2) AS pct\nFROM by_district b\nCROSS JOIN tot t\nORDER BY pct DESC, b.district;"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "占比分母为全表service_point_id去重总数",
+                    "service_point_id为空将被忽略"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 13
+            },
+            "time_column": null,
+            "required_columns": [
+                "district",
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "水表数=按service_point_id去重计数；粒度=辖区。占比=辖区水表数/全表水表总数。安全限制：仅基于本表，不代表全市/全网口径；无时间维度。"
+    },
+    {
+        "id": "snpt_dist_diameter",
+        "desc": "按表径统计水表数量分布",
+        "type": "aggregate",
+        "title": "表径分布统计",
+        "examples": [
+            "不同口径水表有多少",
+            "查看表径分布情况"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT meter_diameter,\n       COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY meter_diameter\nORDER BY meter_cnt DESC, meter_diameter;"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "以service_point_id去重计数",
+                    "适合绘制条形图/饼图"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 8
+            },
+            "time_column": null,
+            "required_columns": [
+                "meter_diameter",
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "水表数=按service_point_id去重计数；粒度=表径。安全限制：仅用于分布分析，不含时间过滤；避免用于明细导出。"
+    },
+    {
+        "id": "snpt_type_subtype_matrix",
+        "desc": "统计水表类型与子类组合的数量",
+        "type": "aggregate",
+        "title": "类型子类分布",
+        "examples": [
+            "不同类型与子类的水表数量",
+            "查看类型与子类的组合分布"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT meter_type,\n       meter_subtype,\n       COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY meter_type, meter_subtype\nORDER BY meter_cnt DESC, meter_type, meter_subtype;"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "组合基数<=5×9=45",
+                    "以service_point_id去重计数"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 45
+            },
+            "time_column": null,
+            "required_columns": [
+                "meter_type",
+                "meter_subtype",
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "水表数=按service_point_id去重计数；粒度=类型×子类组合。安全限制：仅用于汇总分析，不包含时间或业务状态变化。"
+    },
+    {
+        "id": "snpt_quality_spid_uniq",
+        "desc": "评估service_point_id的空值与重复情况",
+        "type": "quality",
+        "title": "服务点唯一性检",
+        "examples": [
+            "检查服务点ID是否唯一",
+            "统计service_point_id空值与重复情况"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT\n  COUNT(*) AS total_rows,\n  SUM(service_point_id IS NULL) AS null_cnt,\n  COUNT(DISTINCT service_point_id) AS distinct_cnt,\n  (COUNT(*) - COUNT(DISTINCT service_point_id)) AS duplicate_rows_est,\n  (\n    SELECT COUNT(*) FROM (\n      SELECT service_point_id\n      FROM `data-ge`.`water_meter_info`\n      GROUP BY service_point_id\n      HAVING COUNT(*) > 1\n    ) AS dup\n  ) AS dup_key_groups\nFROM `data-ge`.`water_meter_info`;"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "用于键完整性检查",
+                    "重复行估算=总行数-去重数"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": null
+            },
+            "time_column": null,
+            "required_columns": [
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "质量检查口径：在本表内评估service_point_id的非空与唯一性，不代表跨表全局唯一。安全限制：仅输出汇总指标，不暴露明细重复值。"
+    },
+    {
+        "id": "snpt_quality_account_nulls",
+        "desc": "抽取account_id为空的记录用于排查",
+        "type": "quality",
+        "title": "账户ID缺失明细",
+        "examples": [
+            "列出account_id为空的水表",
+            "抽样查看账户缺失的数据行"
+        ],
+        "variables": [
+            {
+                "name": "limit_n",
+                "type": "int",
+                "default": 50
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT *\nFROM `data-ge`.`water_meter_info`\nWHERE account_id IS NULL\nLIMIT {{limit_n}};"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "明细仅限小样本抽取",
+                    "建议LIMIT<=100，避免全量导出"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": null
+            },
+            "time_column": null,
+            "required_columns": [
+                "account_id"
+            ]
+        },
+        "business_caliber": "质量抽样：筛出账户ID缺失的水表记录，便于核对。安全限制：仅用于样本排查，不建议在生产中全量导出；如需口径统计请改为COUNT聚合。"
+    },
+    {
+        "id": "snpt_sample_random_rows",
+        "desc": "随机抽取水表信息用于人工核验",
+        "type": "sample",
+        "title": "随机抽样明细",
+        "examples": [
+            "抽样查看水表信息",
+            "随机抽取20条做质检"
+        ],
+        "variables": [
+            {
+                "name": "sample_size",
+                "type": "int",
+                "default": 20
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT *\nFROM `data-ge`.`water_meter_info`\nORDER BY RAND()\nLIMIT {{sample_size}};"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "使用RAND()随机，样本不可复现",
+                    "建议限制样本量"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 300
+            },
+            "time_column": null,
+            "required_columns": [
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "样本抽取：从本表随机返回若干行明细。安全限制：避免扩大LIMIT进行全量下载；如需可复现样本，请改用带种子的随机方法（MySQL不原生支持）。"
+    },
+    {
+        "id": "snpt_filter_office_type_where",
+        "desc": "常用WHERE筛选条件片段：按营业所与类型且为有效",
+        "type": "sample",
+        "title": "机构类型筛选片",
+        "examples": [
+            "筛选A营业所的机械表",
+            "仅查看某营业所的指定类型水表"
+        ],
+        "variables": [
+            {
+                "name": "supply_office",
+                "type": "string"
+            },
+            {
+                "name": "meter_type",
+                "type": "string"
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "WHERE supply_office = '{{supply_office}}'\n  AND meter_type = '{{meter_type}}'\n  AND meter_status = '有效'"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "这是条件片段，可拼接到其他查询",
+                    "meter_status当前为单一值“有效”"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 11
+            },
+            "time_column": null,
+            "required_columns": [
+                "supply_office",
+                "meter_type",
+                "meter_status"
+            ]
+        },
+        "business_caliber": "过滤口径：仅保留指定营业所与指定水表类型、且状态为“有效”的记录。安全限制：为片段用途，需拼接在SELECT…FROM之后使用。"
+    },
+    {
+        "id": "snpt_office_station_dist",
+        "desc": "按营业所与站点组合统计水表数",
+        "type": "aggregate",
+        "title": "所站层级分布",
+        "examples": [
+            "按营业所查看各站点水表数",
+            "所站两级的水表分布情况"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT supply_office,\n       station,\n       COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY supply_office, station\nORDER BY supply_office, meter_cnt DESC, station;"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "组合基数<=11×36=396",
+                    "以service_point_id去重计数",
+                    "如结果过长可再按TopN筛选"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 396
+            },
+            "time_column": null,
+            "required_columns": [
+                "supply_office",
+                "station",
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "水表数=按service_point_id去重计数；粒度=营业所×站点。安全限制：结果行数可能较多，建议在可视化端增加筛选或分页。"
+    },
+    {
+        "id": "snpt_total_meter_baseline",
+        "desc": "获取全表水表去重总量基线",
+        "type": "aggregate",
+        "title": "水表总量基线",
+        "examples": [
+            "当前有多少只水表",
+            "作为占比分析的分母基线"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT COUNT(DISTINCT service_point_id) AS meter_total\nFROM `data-ge`.`water_meter_info`;"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "作为其他占比/分摊分母基线",
+                    "忽略service_point_id为空的记录"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 300
+            },
+            "time_column": null,
+            "required_columns": [
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "水表总量=按service_point_id去重计数；基于当前表的全量记录。安全限制：无时间维度，无法反映存量随时间变化。"
+    }
+]
--- a/demo/水务/水务-qwen3-coder-480b-ge-desc.json
+++ b/demo/水务/水务-qwen3-coder-480b-ge-desc.json
@ -0,0 +1,415 @@
+{
+    "role": "dimension",
+    "time": {
+        "range": null,
+        "column": null,
+        "has_gaps": null,
+        "granularity": "unknown"
+    },
+    "grain": [
+        "account_id",
+        "service_point_id"
+    ],
+    "table": "data-ge.water_meter_info",
+    "columns": [
+        {
+            "name": "supply_office",
+            "dtype": "string",
+            "stats": {},
+            "comment": "供水管理所名称，枚举值",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [
+                {
+                    "pct": null,
+                    "value": "宝山供水管理所"
+                },
+                {
+                    "pct": null,
+                    "value": "黄浦供水管理所"
+                },
+                {
+                    "pct": null,
+                    "value": "青东供水管理所"
+                },
+                {
+                    "pct": null,
+                    "value": "虹口供水管理所"
+                },
+                {
+                    "pct": null,
+                    "value": "闸北供水管理所"
+                },
+                {
+                    "pct": null,
+                    "value": "松北供水管理所"
+                },
+                {
+                    "pct": null,
+                    "value": "杨浦供水管理所"
+                },
+                {
+                    "pct": null,
+                    "value": "长宁供水管理所"
+                },
+                {
+                    "pct": null,
+                    "value": "闵行供水管理所"
+                },
+                {
+                    "pct": null,
+                    "value": "徐汇供水管理所"
+                },
+                {
+                    "pct": null,
+                    "value": "普陀供水管理所"
+                }
+            ],
+            "semantic_type": "dimension",
+            "distinct_count": 11,
+            "distinct_ratio": 0.03666666666666667,
+            "pk_candidate_score": 0.11,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "station",
+            "dtype": "string",
+            "stats": {},
+            "comment": "站点名称，枚举值",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [
+                {
+                    "pct": null,
+                    "value": "新闸站"
+                },
+                {
+                    "pct": null,
+                    "value": "宝杨站"
+                },
+                {
+                    "pct": null,
+                    "value": "江川站"
+                },
+                {
+                    "pct": null,
+                    "value": "长江站"
+                },
+                {
+                    "pct": null,
+                    "value": "市光站"
+                },
+                {
+                    "pct": null,
+                    "value": "徐泾站"
+                },
+                {
+                    "pct": null,
+                    "value": "真北站"
+                },
+                {
+                    "pct": null,
+                    "value": "半淞园站"
+                },
+                {
+                    "pct": null,
+                    "value": "芙蓉江站"
+                },
+                {
+                    "pct": null,
+                    "value": "密云站"
+                }
+            ],
+            "semantic_type": "dimension",
+            "distinct_count": 36,
+            "distinct_ratio": 0.12,
+            "pk_candidate_score": 0.36,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "district",
+            "dtype": "string",
+            "stats": {},
+            "comment": "行政区划名称，枚举值",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [
+                {
+                    "pct": null,
+                    "value": "普陀区"
+                },
+                {
+                    "pct": null,
+                    "value": "闵行区"
+                },
+                {
+                    "pct": null,
+                    "value": "嘉定区"
+                },
+                {
+                    "pct": null,
+                    "value": "杨浦区"
+                },
+                {
+                    "pct": null,
+                    "value": "徐汇区"
+                },
+                {
+                    "pct": null,
+                    "value": "黄浦区"
+                },
+                {
+                    "pct": null,
+                    "value": "松江区"
+                },
+                {
+                    "pct": null,
+                    "value": "长宁区"
+                },
+                {
+                    "pct": null,
+                    "value": "青浦区"
+                },
+                {
+                    "pct": null,
+                    "value": "虹口区"
+                }
+            ],
+            "semantic_type": "dimension",
+            "distinct_count": 13,
+            "distinct_ratio": 0.043333333333333335,
+            "pk_candidate_score": 0.13,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "meter_diameter",
+            "dtype": "string",
+            "stats": {},
+            "comment": "水表直径规格，枚举值",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [
+                {
+                    "pct": null,
+                    "value": "20mm"
+                },
+                {
+                    "pct": null,
+                    "value": "15mm"
+                },
+                {
+                    "pct": null,
+                    "value": "25mm"
+                },
+                {
+                    "pct": null,
+                    "value": "40mm"
+                },
+                {
+                    "pct": null,
+                    "value": "150mm"
+                },
+                {
+                    "pct": null,
+                    "value": "100mm"
+                },
+                {
+                    "pct": null,
+                    "value": "80mm"
+                },
+                {
+                    "pct": null,
+                    "value": "50mm"
+                }
+            ],
+            "semantic_type": "dimension",
+            "distinct_count": 8,
+            "distinct_ratio": 0.02666666666666667,
+            "pk_candidate_score": 0.08,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "meter_status",
+            "dtype": "string",
+            "stats": {},
+            "comment": "水表状态，枚举值",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [
+                {
+                    "pct": null,
+                    "value": "有效"
+                }
+            ],
+            "semantic_type": "dimension",
+            "distinct_count": 1,
+            "distinct_ratio": 0.0033333333333333335,
+            "pk_candidate_score": 0.01,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "meter_subtype",
+            "dtype": "string",
+            "stats": {},
+            "comment": "水表子类型，枚举值",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [
+                {
+                    "pct": null,
+                    "value": "旋翼半液封式"
+                },
+                {
+                    "pct": null,
+                    "value": "超声波式"
+                },
+                {
+                    "pct": null,
+                    "value": "旋翼湿式（指针式）"
+                },
+                {
+                    "pct": null,
+                    "value": "旋翼湿式（数字指针式）"
+                },
+                {
+                    "pct": null,
+                    "value": "电磁式"
+                },
+                {
+                    "pct": null,
+                    "value": "无直管段要求超声波式"
+                },
+                {
+                    "pct": null,
+                    "value": "无直管段要求电磁式"
+                },
+                {
+                    "pct": null,
+                    "value": "垂直螺翼干式"
+                },
+                {
+                    "pct": null,
+                    "value": "机械容积式"
+                }
+            ],
+            "semantic_type": "dimension",
+            "distinct_count": 9,
+            "distinct_ratio": 0.03,
+            "pk_candidate_score": 0.09,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "meter_type",
+            "dtype": "string",
+            "stats": {},
+            "comment": "水表类型，枚举值",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [
+                {
+                    "pct": null,
+                    "value": "容积式机械水表"
+                },
+                {
+                    "pct": null,
+                    "value": "速度式机械水表"
+                },
+                {
+                    "pct": null,
+                    "value": "电磁式远传水表"
+                },
+                {
+                    "pct": null,
+                    "value": "速度式机电远传水表"
+                },
+                {
+                    "pct": null,
+                    "value": "超声波式远传水表"
+                }
+            ],
+            "semantic_type": "dimension",
+            "distinct_count": 5,
+            "distinct_ratio": 0.016666666666666666,
+            "pk_candidate_score": 0.05,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "installation_position",
+            "dtype": "string",
+            "stats": {},
+            "comment": "安装位置，枚举值",
+            "enumish": true,
+            "null_rate": 0.0,
+            "top_values": [
+                {
+                    "pct": null,
+                    "value": "嵌墙表"
+                },
+                {
+                    "pct": null,
+                    "value": "管道井表"
+                },
+                {
+                    "pct": null,
+                    "value": "地下表"
+                },
+                {
+                    "pct": null,
+                    "value": "龙头表"
+                }
+            ],
+            "semantic_type": "dimension",
+            "distinct_count": 4,
+            "distinct_ratio": 0.013333333333333334,
+            "pk_candidate_score": 0.04,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "account_id",
+            "dtype": "string",
+            "stats": {},
+            "comment": "账户ID",
+            "enumish": false,
+            "null_rate": null,
+            "top_values": [],
+            "semantic_type": "id",
+            "distinct_count": null,
+            "distinct_ratio": null,
+            "pk_candidate_score": 0.95,
+            "metric_candidate_score": 0.0
+        },
+        {
+            "name": "service_point_id",
+            "dtype": "string",
+            "stats": {},
+            "comment": "服务点ID",
+            "enumish": false,
+            "null_rate": null,
+            "top_values": [],
+            "semantic_type": "id",
+            "distinct_count": null,
+            "distinct_ratio": null,
+            "pk_candidate_score": 0.95,
+            "metric_candidate_score": 0.0
+        }
+    ],
+    "quality": {
+        "warning_hints": [],
+        "failed_expectations": []
+    },
+    "row_count": 300,
+    "fk_candidates": [],
+    "confidence_notes": [
+        "role判定为dimension，因所有列均为枚举或ID类型，无metric列",
+        "grain依据account_id和service_point_id为唯一标识推测",
+        "未发现时间列，因此time字段为null"
+    ],
+    "primary_key_candidates": [
+        [
+            "account_id"
+        ],
+        [
+            "service_point_id"
+        ]
+    ]
+}
--- a/demo/水务/水务-qwen3-coder-480b-snippet-alias.json
+++ b/demo/水务/水务-qwen3-coder-480b-snippet-alias.json
@ -0,0 +1,286 @@
+[
+    {
+        "id": "snpt_water_meter_top_supply_office",
+        "aliases": [
+            {
+                "text": "供水所水表排行",
+                "tone": "中性"
+            },
+            {
+                "text": "哪个供水所水表最多",
+                "tone": "口语"
+            },
+            {
+                "text": "供水管理所水表TopN统计",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "水表",
+            "供水管理所",
+            "排行",
+            "TopN",
+            "数量",
+            "统计",
+            "count",
+            "排名",
+            "前N",
+            "供水所",
+            "水表数",
+            "维度聚合",
+            "by_dimension",
+            "topn"
+        ],
+        "intent_tags": [
+            "topn",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_water_meter_top_station",
+        "aliases": [
+            {
+                "text": "站点水表数量排行",
+                "tone": "中性"
+            },
+            {
+                "text": "哪个站点水表最多",
+                "tone": "口语"
+            },
+            {
+                "text": "站点维度水表TopN分析",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "水表",
+            "站点",
+            "排行",
+            "TopN",
+            "数量",
+            "统计",
+            "count",
+            "排名",
+            "前N",
+            "站点数",
+            "维度聚合",
+            "by_dimension",
+            "topn"
+        ],
+        "intent_tags": [
+            "topn",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_water_meter_top_district",
+        "aliases": [
+            {
+                "text": "区域水表数量排名",
+                "tone": "中性"
+            },
+            {
+                "text": "哪个区水表最多",
+                "tone": "口语"
+            },
+            {
+                "text": "行政区水表TopN统计",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "水表",
+            "区域",
+            "行政区",
+            "排行",
+            "TopN",
+            "数量",
+            "统计",
+            "count",
+            "排名",
+            "前N",
+            "区",
+            "水表数",
+            "维度聚合",
+            "by_dimension",
+            "topn"
+        ],
+        "intent_tags": [
+            "topn",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_water_meter_share_by_type",
+        "aliases": [
+            {
+                "text": "水表类型占比",
+                "tone": "中性"
+            },
+            {
+                "text": "哪种水表用得最多",
+                "tone": "口语"
+            },
+            {
+                "text": "水表类型分布比例",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "水表",
+            "类型",
+            "占比",
+            "比例",
+            "ratio",
+            "分布",
+            "meter_type",
+            "百分比",
+            "分类统计",
+            "水表类型",
+            "ratio",
+            "aggregate",
+            "by_dimension"
+        ],
+        "intent_tags": [
+            "ratio",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_water_meter_subtype_distribution",
+        "aliases": [
+            {
+                "text": "水表子类型分布",
+                "tone": "中性"
+            },
+            {
+                "text": "各种子类型水表情况",
+                "tone": "口语"
+            },
+            {
+                "text": "水表子类型计数与占比",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "水表",
+            "子类型",
+            "分布",
+            "数量",
+            "占比",
+            "meter_subtype",
+            "统计",
+            "count",
+            "百分比",
+            "分类统计",
+            "aggregate",
+            "by_dimension"
+        ],
+        "intent_tags": [
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_water_meter_installation_position_stats",
+        "aliases": [
+            {
+                "text": "安装位置统计",
+                "tone": "中性"
+            },
+            {
+                "text": "哪种位置装表最多",
+                "tone": "口语"
+            },
+            {
+                "text": "水表安装位置分布",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "水表",
+            "安装位置",
+            "统计",
+            "分布",
+            "installation_position",
+            "数量",
+            "count",
+            "位置",
+            "安装点",
+            "aggregate",
+            "by_dimension"
+        ],
+        "intent_tags": [
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_water_meter_grain_check",
+        "aliases": [
+            {
+                "text": "主键粒度校验",
+                "tone": "中性"
+            },
+            {
+                "text": "数据有没有重复",
+                "tone": "口语"
+            },
+            {
+                "text": "数据粒度一致性检查",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "主键",
+            "粒度",
+            "校验",
+            "质量",
+            "quality",
+            "重复",
+            "唯一性",
+            "account_id",
+            "service_point_id",
+            "数据校验",
+            "质量检查",
+            "异常检测"
+        ],
+        "intent_tags": [
+            "quality"
+        ]
+    },
+    {
+        "id": "snpt_water_meter_sample_records",
+        "aliases": [
+            {
+                "text": "水表数据抽样",
+                "tone": "中性"
+            },
+            {
+                "text": "给我看点水表数据",
+                "tone": "口语"
+            },
+            {
+                "text": "水表记录样本抽取",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "水表",
+            "样本",
+            "抽样",
+            "sample",
+            "随机",
+            "记录",
+            "抽查",
+            "limit",
+            "数据结构",
+            "数据示例",
+            "sample",
+            "limit_rows"
+        ],
+        "intent_tags": [
+            "sample"
+        ]
+    }
+]
--- a/demo/水务/水务-qwen3-coder-480b-snippet.json
+++ b/demo/水务/水务-qwen3-coder-480b-snippet.json
@ -0,0 +1,235 @@
+[
+    {
+        "id": "snpt_water_meter_top_supply_office",
+        "desc": "统计各供水管理所下辖水表数量并排序",
+        "type": "topn",
+        "title": "供水管理所水表数量排行",
+        "examples": [
+            "列出水表最多的前10个供水管理所",
+            "各供水所水表数量排名"
+        ],
+        "variables": [
+            {
+                "name": "top_n",
+                "type": "int",
+                "default": 10
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT supply_office AS dim_value, COUNT(*) AS metric_value FROM `data-ge.water_meter_info` GROUP BY supply_office ORDER BY metric_value DESC LIMIT {{top_n}}"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 11
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "supply_office"
+            ]
+        },
+        "business_caliber": "按供水管理所维度聚合水表总数，粒度=供水管理所"
+    },
+    {
+        "id": "snpt_water_meter_top_station",
+        "desc": "统计各个站点下辖水表数量并排序",
+        "type": "topn",
+        "title": "站点水表数量排行",
+        "examples": [
+            "列出水表最多的前10个站点",
+            "各站点水表数量排名"
+        ],
+        "variables": [
+            {
+                "name": "top_n",
+                "type": "int",
+                "default": 10
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT station AS dim_value, COUNT(*) AS metric_value FROM `data-ge.water_meter_info` GROUP BY station ORDER BY metric_value DESC LIMIT {{top_n}}"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "高基数维度建议LIMIT<=50"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 36
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "station"
+            ]
+        },
+        "business_caliber": "按站点维度聚合水表总数，粒度=站点"
+    },
+    {
+        "id": "snpt_water_meter_top_district",
+        "desc": "统计各区水表数量并排序",
+        "type": "topn",
+        "title": "区域水表数量排行",
+        "examples": [
+            "列出各区水表数量排名",
+            "哪个区的水表最多？"
+        ],
+        "variables": [
+            {
+                "name": "top_n",
+                "type": "int",
+                "default": 10
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT district AS dim_value, COUNT(*) AS metric_value FROM `data-ge.water_meter_info` GROUP BY district ORDER BY metric_value DESC LIMIT {{top_n}}"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 13
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "district"
+            ]
+        },
+        "business_caliber": "按行政区划维度聚合水表总数，粒度=区"
+    },
+    {
+        "id": "snpt_water_meter_share_by_type",
+        "desc": "计算各类水表占总水表的比例",
+        "type": "ratio",
+        "title": "水表类型占比分布",
+        "examples": [
+            "各类水表占比是多少？",
+            "哪种类型的水表使用最广泛？"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT meter_type AS dim_value, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM `data-ge.water_meter_info`) AS ratio_percent FROM `data-ge.water_meter_info` GROUP BY meter_type ORDER BY ratio_percent DESC"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 5
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "meter_type"
+            ]
+        },
+        "business_caliber": "按水表类型分类计算其占比，粒度=水表类型"
+    },
+    {
+        "id": "snpt_water_meter_subtype_distribution",
+        "desc": "展示不同水表子类型的数量及比例",
+        "type": "aggregate",
+        "title": "水表子类型分布情况",
+        "examples": [
+            "各种子类型水表的数量和占比",
+            "哪种子类型水表最多？"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT meter_subtype AS dim_value, COUNT(*) AS count_value, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM `data-ge.water_meter_info`), 2) AS percentage FROM `data-ge.water_meter_info` GROUP BY meter_subtype ORDER BY count_value DESC"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 9
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "meter_subtype"
+            ]
+        },
+        "business_caliber": "按水表子类型进行计数和百分比统计，粒度=水表子类型"
+    },
+    {
+        "id": "snpt_water_meter_installation_position_stats",
+        "desc": "统计不同安装位置下的水表数量",
+        "type": "aggregate",
+        "title": "安装位置分布统计",
+        "examples": [
+            "各种安装位置的水表数量",
+            "哪种安装位置最为常见？"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT installation_position AS dim_value, COUNT(*) AS count_value FROM `data-ge.water_meter_info` GROUP BY installation_position ORDER BY count_value DESC"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 4
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "installation_position"
+            ]
+        },
+        "business_caliber": "按安装位置对水表进行分组计数，粒度=安装位置"
+    },
+    {
+        "id": "snpt_water_meter_grain_check",
+        "desc": "验证 account_id 和 service_point_id 是否构成唯一组合",
+        "type": "quality",
+        "title": "主键粒度校验",
+        "examples": [
+            "这张表的数据粒度是否正确？",
+            "是否存在重复的服务点记录？"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT IF(COUNT(*) = COUNT(DISTINCT account_id, service_point_id), 'PASS', 'FAIL') AS grain_check_result FROM `data-ge.water_meter_info`"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [],
+                "fk_join_available": false,
+                "dim_cardinality_hint": null
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "account_id",
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "检验数据是否符合预期的主键粒度（account_id + service_point_id）"
+    },
+    {
+        "id": "snpt_water_meter_sample_records",
+        "desc": "随机抽取部分水表信息用于查看结构",
+        "type": "sample",
+        "title": "样本抽取",
+        "examples": [
+            "给我看几条水表数据的例子",
+            "抽查一些原始数据看看格式"
+        ],
+        "variables": [
+            {
+                "name": "limit_rows",
+                "type": "int",
+                "default": 5
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT * FROM `data-ge.water_meter_info` ORDER BY RAND() LIMIT {{limit_rows}}"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [],
+                "fk_join_available": false,
+                "dim_cardinality_hint": null
+            },
+            "time_column": "nullable",
+            "required_columns": []
+        },
+        "business_caliber": "从全量数据中随机采样若干条记录供参考"
+    }
+]
--- a/demo/水务/水务-qwen3-next-80b-ge-desc.json
+++ b/demo/水务/水务-qwen3-next-80b-ge-desc.json
--- a/demo/水务/水务-qwen3-next-80b-snippet-alias.json
+++ b/demo/水务/水务-qwen3-next-80b-snippet-alias.json
@ -0,0 +1,249 @@
+[
+    {
+        "id": "snpt_topn_supply_office_by_account",
+        "aliases": [
+            {
+                "text": "哪个供水所用户最多？",
+                "tone": "口语"
+            },
+            {
+                "text": "按供应办公室统计账户数量",
+                "tone": "中性"
+            },
+            {
+                "text": "供应办公室账户数TopN排名",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "供应办公室",
+            "账户数",
+            "TopN",
+            "排行",
+            "统计",
+            "account_id",
+            "supply_office",
+            "去重",
+            "高占比",
+            "维度分析",
+            "by_dimension",
+            "aggregate",
+            "topn"
+        ],
+        "intent_tags": [
+            "topn",
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_topn_station_by_account",
+        "aliases": [
+            {
+                "text": "哪些站点用户最多？",
+                "tone": "口语"
+            },
+            {
+                "text": "按站点统计账户分布",
+                "tone": "中性"
+            },
+            {
+                "text": "站点账户数Top20排名",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "站点",
+            "账户数",
+            "TopN",
+            "排行",
+            "统计",
+            "station",
+            "account_id",
+            "去重",
+            "高负载",
+            "维度分析",
+            "by_dimension",
+            "aggregate",
+            "topn"
+        ],
+        "intent_tags": [
+            "topn",
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_topn_district_by_account",
+        "aliases": [
+            {
+                "text": "哪个区用户最多？",
+                "tone": "口语"
+            },
+            {
+                "text": "按行政区统计账户数量",
+                "tone": "中性"
+            },
+            {
+                "text": "行政区账户数全量排名",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "行政区",
+            "账户数",
+            "TopN",
+            "排行",
+            "统计",
+            "district",
+            "account_id",
+            "去重",
+            "区域对比",
+            "维度分析",
+            "by_dimension",
+            "aggregate",
+            "topn"
+        ],
+        "intent_tags": [
+            "topn",
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_share_of_meter_type",
+        "aliases": [
+            {
+                "text": "各类水表占多少比例？",
+                "tone": "口语"
+            },
+            {
+                "text": "水表类型占比分析",
+                "tone": "中性"
+            },
+            {
+                "text": "水表类型占比分布",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "水表类型",
+            "占比",
+            "比例",
+            "meter_type",
+            "account_id",
+            "去重",
+            "分布",
+            "主流类型",
+            "技术选型",
+            "ratio",
+            "aggregate",
+            "by_dimension"
+        ],
+        "intent_tags": [
+            "ratio",
+            "aggregate",
+            "by_dimension"
+        ]
+    },
+    {
+        "id": "snpt_sample_account_service_point",
+        "aliases": [
+            {
+                "text": "随机看10条账户信息",
+                "tone": "口语"
+            },
+            {
+                "text": "抽样账户与服务点明细",
+                "tone": "中性"
+            },
+            {
+                "text": "账户-服务点随机抽样验证",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "抽样",
+            "随机",
+            "样本",
+            "account_id",
+            "service_point_id",
+            "数据质量",
+            "验证",
+            "唯一性",
+            "格式检查",
+            "sample",
+            "quality"
+        ],
+        "intent_tags": [
+            "sample",
+            "quality"
+        ]
+    },
+    {
+        "id": "snpt_filter_meter_status_valid",
+        "aliases": [
+            {
+                "text": "只取有效的水表记录",
+                "tone": "口语"
+            },
+            {
+                "text": "筛选有效水表记录",
+                "tone": "中性"
+            },
+            {
+                "text": "水表状态有效性过滤",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "有效",
+            "过滤",
+            "筛选",
+            "meter_status",
+            "质量检查",
+            "断言",
+            "清洗",
+            "filter",
+            "quality"
+        ],
+        "intent_tags": [
+            "filter",
+            "quality"
+        ]
+    },
+    {
+        "id": "snpt_filter_meter_diameter_20mm",
+        "aliases": [
+            {
+                "text": "找出所有20mm水表用户",
+                "tone": "口语"
+            },
+            {
+                "text": "筛选20mm水表记录",
+                "tone": "中性"
+            },
+            {
+                "text": "20mm口径水表子集提取",
+                "tone": "专业"
+            }
+        ],
+        "keywords": [
+            "20mm",
+            "水表直径",
+            "过滤",
+            "筛选",
+            "meter_diameter",
+            "子集",
+            "分析",
+            "住宅用水",
+            "规格",
+            "filter",
+            "by_dimension"
+        ],
+        "intent_tags": [
+            "filter",
+            "by_dimension"
+        ]
+    }
+]
--- a/demo/水务/水务-qwen3-next-80b-snippet.json
+++ b/demo/水务/水务-qwen3-next-80b-snippet.json
@ -0,0 +1,227 @@
+[
+    {
+        "id": "snpt_topn_supply_office_by_account",
+        "desc": "统计各供应办公室对应的账户数量，识别高占比管理所",
+        "type": "topn",
+        "title": "按供应办公室统计账户数",
+        "examples": [
+            "哪个供水管理所服务的用户最多？",
+            "列出前5个账户数最多的供应办公室"
+        ],
+        "variables": [
+            {
+                "name": "top_n",
+                "type": "int",
+                "default": 11
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT supply_office, COUNT(DISTINCT account_id) AS account_count\nFROM water_meter_info\nGROUP BY supply_office\nORDER BY account_count DESC\nLIMIT {{top_n}};"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "供应办公室仅11个唯一值，可安全展示全部；建议LIMIT 11避免冗余排序"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 11
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "supply_office",
+                "account_id"
+            ]
+        },
+        "business_caliber": "粒度=供应办公室，指标=去重账户数（account_id），仅统计水表信息表中有效账户，不关联外部表"
+    },
+    {
+        "id": "snpt_topn_station_by_account",
+        "desc": "统计各站点服务的账户数量，识别高负载站点",
+        "type": "topn",
+        "title": "按站点统计账户分布",
+        "examples": [
+            "哪些站点服务的用户最多？",
+            "TOP10用户最多的站点是哪些？"
+        ],
+        "variables": [
+            {
+                "name": "top_n",
+                "type": "int",
+                "default": 20
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT station, COUNT(DISTINCT account_id) AS account_count\nFROM water_meter_info\nGROUP BY station\nORDER BY account_count DESC\nLIMIT {{top_n}};"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "站点有36个唯一值，建议LIMIT<=20以避免结果过长；高基数维度可能影响查询性能"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 36
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "station",
+                "account_id"
+            ]
+        },
+        "business_caliber": "粒度=站点（station），指标=去重账户数（account_id），基于水表信息表直接聚合，不涉及时间维度"
+    },
+    {
+        "id": "snpt_topn_district_by_account",
+        "desc": "统计各行政区的账户数量，辅助区域资源分配分析",
+        "type": "topn",
+        "title": "按行政区统计账户分布",
+        "examples": [
+            "哪个区的用水账户最多？",
+            "列出所有行政区的账户数量排名"
+        ],
+        "variables": [
+            {
+                "name": "top_n",
+                "type": "int",
+                "default": 13
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT district, COUNT(DISTINCT account_id) AS account_count\nFROM water_meter_info\nGROUP BY district\nORDER BY account_count DESC\nLIMIT {{top_n}};"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "行政区共13个，可完整展示；适合用于区域对比分析"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 13
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "district",
+                "account_id"
+            ]
+        },
+        "business_caliber": "粒度=行政区（district），指标=去重账户数（account_id），基于水表信息表聚合，反映各区域用户规模"
+    },
+    {
+        "id": "snpt_share_of_meter_type",
+        "desc": "计算各类水表类型在总账户中的占比，识别主流类型",
+        "type": "ratio",
+        "title": "水表类型占比分析",
+        "examples": [
+            "各类水表在用户中的占比是多少？",
+            "电磁式远传水表占总用户比例多少？"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT meter_type, \n       COUNT(DISTINCT account_id) AS account_count,\n       ROUND(COUNT(DISTINCT account_id) * 100.0 / SUM(COUNT(DISTINCT account_id)) OVER (), 2) AS percentage\nFROM water_meter_info\nGROUP BY meter_type\nORDER BY account_count DESC;"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "水表类型仅5种，适合计算占比；可直接展示全量分布"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 5
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "meter_type",
+                "account_id"
+            ]
+        },
+        "business_caliber": "粒度=水表类型（meter_type），指标=去重账户数占比，分母为全表去重账户总数，反映技术选型分布"
+    },
+    {
+        "id": "snpt_sample_account_service_point",
+        "desc": "随机抽取部分账户与服务点ID的原始记录，用于数据质量核查",
+        "type": "sample",
+        "title": "抽样账户与服务点明细",
+        "examples": [
+            "随机查看10条账户与服务点的详细信息",
+            "抽样检查水表信息是否符合预期格式"
+        ],
+        "variables": [
+            {
+                "name": "sample_size",
+                "type": "int",
+                "default": 10
+            }
+        ],
+        "dialect_sql": {
+            "mysql": "SELECT account_id, service_point_id, supply_office, station, district, meter_diameter, meter_type, meter_subtype, installation_position\nFROM water_meter_info\nORDER BY RAND()\nLIMIT {{sample_size}};"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "主键组合为account_id+service_point_id，适合抽样验证唯一性；建议样本量≤100"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": null
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "account_id",
+                "service_point_id"
+            ]
+        },
+        "business_caliber": "粒度=单条水表记录，抽取样本用于验证account_id与service_point_id的组合唯一性及维度字段完整性"
+    },
+    {
+        "id": "snpt_filter_meter_status_valid",
+        "desc": "过滤出水表状态为'有效'的记录，用于后续分析",
+        "type": "quality",
+        "title": "筛选有效水表记录",
+        "examples": [
+            "只取状态为有效的水表记录",
+            "确认所有水表是否均为有效状态"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT *\nFROM water_meter_info\nWHERE meter_status = '有效';"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "meter_status仅存在'有效'值，此条件恒成立；可用于数据清洗流程的显式过滤"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 1
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "meter_status"
+            ]
+        },
+        "business_caliber": "仅保留水表状态为'有效'的记录，因全表均为有效值，此过滤为冗余但可作为数据质量校验的显式断言"
+    },
+    {
+        "id": "snpt_filter_meter_diameter_20mm",
+        "desc": "筛选水表直径为20mm的记录，用于特定口径设备分析",
+        "type": "quality",
+        "title": "筛选20mm水表记录",
+        "examples": [
+            "找出所有使用20mm水表的用户",
+            "20mm水表分布在哪些站点？"
+        ],
+        "variables": [],
+        "dialect_sql": {
+            "mysql": "SELECT *\nFROM water_meter_info\nWHERE meter_diameter = '20mm';"
+        },
+        "applicability": {
+            "constraints": {
+                "notes": [
+                    "水表直径共8种枚举值，20mm为常见规格；可作为子集分析的起点"
+                ],
+                "fk_join_available": false,
+                "dim_cardinality_hint": 8
+            },
+            "time_column": "nullable",
+            "required_columns": [
+                "meter_diameter"
+            ]
+        },
+        "business_caliber": "粒度=单条水表记录，筛选条件为meter_diameter='20mm'，用于分析标准住宅用水表的分布特征"
+    }
+]
--- a/doc/todo.md
+++ b/doc/todo.md
--- a/doc/会话api.md
+++ b/doc/会话api.md
@ -0,0 +1,49 @@
+# 创建会话
+curl -X POST "/api/v1/chat/sessions" \
+  -H "Content-Type: application/json" \
+  -d "{\"user_id\": $CHAT_USER_ID}"
+
+# 获取会话
+curl "/api/v1/chat/sessions/{session_id}"
+
+# 按用户列出会话
+curl "/api/v1/chat/sessions?user_id=$CHAT_USER_ID"
+
+# 更新会话状态
+curl -X POST "/api/v1/chat/sessions/{session_id}/update" \
+  -H "Content-Type: application/json" \
+  -d '{"status":"PAUSED"}'
+
+# 关闭会话
+curl -X POST "/api/v1/chat/sessions/{session_id}/close"
+
+# 创建对话轮次
+curl -X POST "/api/v1/chat/sessions/{session_id}/turns" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "user_id": '"$CHAT_USER_ID"',
+    "user_query": "展示昨天订单GMV",
+    "intent": "METRIC_QUERY",
+    "ast_json": {"select":["gmv"],"where":{"dt":"yesterday"}},
+    "main_metric_ids": [1234],
+    "created_metric_ids": []
+  }'
+
+# 获取单条对话轮次
+curl "/api/v1/chat/turns/{turn_id}"
+
+# 列出会话下的轮次
+curl "/api/v1/chat/sessions/{session_id}/turns"
+
+# 写入检索结果
+curl -X POST "/api/v1/chat/turns/{turn_id}/retrievals" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "retrievals": [
+      {"item_type":"METRIC","item_id":"metric_foo","used_in_sql":true,"rank_no":1},
+      {"item_type":"SNIPPET","item_id":"snpt_bar","similarity_score":0.77,"rank_no":2}
+    ]
+  }'
+
+# 列出轮次的检索结果
+curl "/api/v1/chat/turns/{turn_id}/retrievals"
--- a/doc/指标api.md
+++ b/doc/指标api.md
@ -0,0 +1,69 @@
+# 新建指标
+curl -X POST "/api/v1/metrics" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "metric_code": "metric_1234",
+    "metric_name": "订单数",
+    "biz_domain": "order",
+    "biz_desc": "订单总数",
+    "base_sql": "select count(*) as order_cnt from orders",
+    "time_grain": "DAY",
+    "dim_binding": ["dt"],
+    "update_strategy": "FULL",
+    "metric_aliases": ["订单量"],
+    "created_by": '"$METRIC_USER_ID"'
+  }'
+
+# 更新指标
+curl -X POST "/api/v1/metrics/{metric_id}" \
+  -H "Content-Type: application/json" \
+  -d '{"metric_name":"订单数-更新","is_active":false}'
+
+# 获取指标
+curl "/api/v1/metrics/{metric_id}"
+
+# 新建调度
+curl -X POST "/api/v1/metric-schedules" \
+  -H "Content-Type: application/json" \
+  -d '{"metric_id":{metric_id},"cron_expr":"0 2 * * *","priority":5,"enabled":true}'
+
+# 更新调度
+curl -X POST "/api/v1/metric-schedules/{schedule_id}" \
+  -H "Content-Type: application/json" \
+  -d '{"enabled":false,"retry_times":1}'
+
+# 列出某指标的调度
+curl "/api/v1/metrics/{metric_id}/schedules"
+
+# 触发运行
+curl -X POST "/api/v1/metric-runs/trigger" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "metric_id": {metric_id},
+    "triggered_by": "API",
+    "data_time_from": "2024-05-01T00:00:00Z",
+    "data_time_to": "2024-05-02T00:00:00Z"
+  }'
+
+# 列出运行
+curl "/api/v1/metric-runs?metric_id={metric_id}"
+
+# 获取单次运行
+curl "/api/v1/metric-runs/{run_id}"
+
+# 写入指标结果
+curl -X POST "/api/v1/metric-results/{metric_id}" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "metric_id": {metric_id},
+    "results": [
+      {"stat_time":"2024-05-01T00:00:00Z","metric_value":123.45,"data_version":"{run_id}"},
+      {"stat_time":"2024-05-02T00:00:00Z","metric_value":234.56,"data_version":"{run_id}"}
+    ]
+  }'
+
+# 查询指标结果
+curl "/api/v1/metric-results?metric_id={metric_id}"
+
+# 查询最新结果
+curl "/api/v1/metric-results/latest?metric_id={metric_id}"
--- a/doc/指标生成.md
+++ b/doc/指标生成.md
@ -0,0 +1,83 @@
+某个用户的一句问话 → 解析成某轮 chat_turn → 这轮用了哪些指标/知识/会话（chat_turn_retrieval） →
+是否产生了新的指标（metric_def） →
+是否触发了指标调度运行（metric_job_run.turn_id） →
+最终产生了哪些指标结果（metric_result.metric_id + stat_time）。
+
+会话域
+schema
+会话表 chat_session
+
+会话轮次表 chat_turn
+
+会话轮次检索关联表 chat_turn_retrieval
+
+
+API
+1. 创建会话
+POST /api/v1/chat/sessions
+2. 更新会话轮次
+POST /api/v1/chat/sessions/{session_id}/update
+3. 结束会话
+POST /api/v1/chat/sessions/{session_id}/close
+4. 查询会话
+GET /api/v1/chat/sessions/{session_id}
+5. 会话列表查询（按用户、时间）
+GET /api/v1/chat/sessions
+6. 创建问答轮次（用户发起 query）
+POST /api/v1/chat/sessions/{session_id}/turns
+7. 查询某会话的所有轮次
+GET /api/v1/chat/sessions/{session_id}/turns
+8. 查看单轮问答详情
+GET /api/v1/chat/turns/{turn_id}
+9. 批量写入某轮的检索结果
+POST /api/v1/chat/turns/{turn_id}/retrievals
+10. 查询某轮的检索记录
+GET /api/v1/chat/turns/{turn_id}/retrievals
+11. 更新某轮的检索记录（in future）
+POST /api/v1/chat/turns/{turn_id}/retrievals/update
+
+元数据域
+schema
+指标定义表 metric_def
+
+
+API
+12. 创建指标（来自问答或传统定义）
+POST /api/v1/metrics
+13. 更新指标
+POST /api/v1/metrics/{id}
+14. 获取指标详情
+GET /api/v1/metrics
+
+执行调度域（暂定airflow）
+schema
+指标调度配置表 metric_schedule
+
+调度运行记录表 metric_job_run
+
+API
+1. 创建调度配置
+POST /api/v1/metric-schedules
+2. 更新调度配置
+POST /api/v1/metric-schedules/{id}
+3. 查询指标调度配置详情
+GET /api/v1/metrics/{metric_id}/schedules
+4. 手动触发一次指标运行（例如来自问数）
+POST /api/v1/metric-runs/trigger
+5. 查询运行记录列表
+GET /api/v1/metric-runs
+6. 查询单次运行详情
+GET /api/metric-runs/{run_id}
+
+数据域
+schema
+指标结果表（纵表）metric_result
+
+
+API
+1. 查询指标结果（按时间段 & 维度）
+GET /api/metric-results
+2. 单点查询（最新值）
+GET /api/metric-results/latest
+3. 批量写入指标结果
+POST /api/v1/metric-results/{metrics_id}
--- a/file/ecommerce_orders.sql
+++ b/file/ecommerce_orders.sql
@ -0,0 +1,21 @@
+CREATE TABLE `ecommerce_orders` (
+  `order_id` char(36) COLLATE utf8mb4_unicode_ci NOT NULL COMMENT 'UUID from CSV',
+  `customer_id` int NOT NULL,
+  `product_id` int NOT NULL,
+  `category` varchar(64) COLLATE utf8mb4_unicode_ci NOT NULL,
+  `price` decimal(10,2) NOT NULL,
+  `quantity` int NOT NULL,
+  `order_date` datetime(6) NOT NULL,
+  `shipping_date` datetime(6) NOT NULL,
+  `delivery_status` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
+  `payment_method` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
+  `device_type` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
+  `channel` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
+  `shipping_address` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
+  `billing_address` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
+  `customer_segment` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
+  PRIMARY KEY (`order_id`),
+  KEY `idx_customer` (`customer_id`),
+  KEY `idx_product` (`product_id`),
+  KEY `idx_order_date` (`order_date`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
--- a/file/tableschema/action_results.sql
+++ b/file/tableschema/action_results.sql
@ -0,0 +1,40 @@
+CREATE TABLE `action_results` (
+  `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键',
+  `table_id` bigint NOT NULL COMMENT '表ID',
+  `version_ts` bigint NOT NULL COMMENT '版本时间戳（版本号）',
+  `action_type` enum('ge_profiling','ge_result_desc','snippet','snippet_alias') COLLATE utf8mb4_bin NOT NULL COMMENT '动作类型',
+  `status` enum('pending','running','success','failed','partial') COLLATE utf8mb4_bin NOT NULL DEFAULT 'pending' COMMENT '执行状态',
+  `llm_usage` json DEFAULT NULL COMMENT 'LLM token usage统计',
+  `error_code` varchar(128) COLLATE utf8mb4_bin DEFAULT NULL,
+  `error_message` text COLLATE utf8mb4_bin,
+  `started_at` datetime DEFAULT NULL,
+  `finished_at` datetime DEFAULT NULL,
+  `duration_ms` int DEFAULT NULL,
+  `table_schema_version_id` varchar(19) COLLATE utf8mb4_bin NOT NULL,
+  `table_schema` json NOT NULL,
+  `ge_profiling_json` json DEFAULT NULL COMMENT 'Profiling完整结果JSON',
+  `ge_profiling_json_size_bytes` bigint DEFAULT NULL,
+  `ge_profiling_summary` json DEFAULT NULL COMMENT 'Profiling摘要（剔除大value_set等）',
+  `ge_profiling_summary_size_bytes` bigint DEFAULT NULL,
+  `ge_profiling_total_size_bytes` bigint DEFAULT NULL COMMENT '上两者合计',
+  `ge_profiling_html_report_url` varchar(1024) COLLATE utf8mb4_bin DEFAULT NULL COMMENT 'GE报告HTML路径/URL',
+  `ge_result_desc_json` json DEFAULT NULL COMMENT '表描述结果JSON',
+  `ge_result_desc_json_size_bytes` bigint DEFAULT NULL,
+  `snippet_json` json DEFAULT NULL COMMENT 'SQL知识片段结果JSON',
+  `snippet_json_size_bytes` bigint DEFAULT NULL,
+  `snippet_alias_json` json DEFAULT NULL COMMENT 'SQL片段改写/丰富结果JSON',
+  `snippet_alias_json_size_bytes` bigint DEFAULT NULL,
+  `callback_url` varchar(1024) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
+  `result_checksum` varbinary(32) DEFAULT NULL COMMENT '对当前action有效载荷计算的MD5/xxhash',
+  `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
+  `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+  `model` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型名称',
+  `model_provider` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型渠道',
+  `model_params` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型参数，如温度',
+  PRIMARY KEY (`id`),
+  UNIQUE KEY `uq_table_ver_action` (`table_id`,`version_ts`,`action_type`),
+  KEY `idx_status` (`status`),
+  KEY `idx_table` (`table_id`,`updated_at`),
+  KEY `idx_action_time` (`action_type`,`version_ts`),
+  KEY `idx_schema_version` (`table_schema_version_id`)
+) ENGINE=InnoDB AUTO_INCREMENT=113 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin ROW_FORMAT=DYNAMIC COMMENT='数据分析知识片段表';
--- a/file/tableschema/chat.sql
+++ b/file/tableschema/chat.sql
@ -0,0 +1,103 @@
+CREATE TABLE IF NOT EXISTS chat_session (
+    id               BIGINT AUTO_INCREMENT PRIMARY KEY,
+    user_id          BIGINT NOT NULL,
+    session_uuid     CHAR(36) NOT NULL,              -- 可用于对外展示的ID（UUID）
+    end_time         DATETIME NULL,
+    status           VARCHAR(16) NOT NULL DEFAULT 'OPEN', -- OPEN/CLOSED/ABANDONED
+    last_turn_id     BIGINT NULL,                    -- 指向 chat_turn.id
+    ext_context      JSON NULL,                      -- 业务上下文
+    created_at       DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at       DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+    UNIQUE KEY uk_session_uuid (session_uuid),
+    KEY idx_user_time (user_id, created_at),
+    KEY idx_status_time (status, created_at),
+    KEY idx_last_turn (last_turn_id)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+
+CREATE TABLE IF NOT EXISTS chat_turn (
+    id                BIGINT AUTO_INCREMENT,
+    session_id        BIGINT NOT NULL,                   -- 关联 chat_session.id
+    turn_no           INT NOT NULL,                      -- 会话内轮次序号（1,2,3...）
+    user_id           BIGINT NOT NULL,
+
+    user_query        TEXT NOT NULL,                     -- 原始用户问句
+    intent            VARCHAR(64) NULL,                  -- METRIC_QUERY/METRIC_EXPLAIN 等
+    ast_json          JSON NULL,                         -- 解析出来的 AST
+
+    generated_sql     MEDIUMTEXT NULL,                   -- 生成的最终SQL
+    sql_status        VARCHAR(32) NULL,                  -- SUCCESS/FAILED/SKIPPED
+    error_msg         TEXT NULL,                         -- SQL生成/执行错误信息
+
+    main_metric_ids       JSON NULL,                     -- 本轮涉及的指标ID列表
+    created_metric_ids    JSON NULL,                     -- 本轮新建指标ID列表
+
+    end_time          DATETIME NULL,
+
+    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+    -- 主键改为联合主键，必须包含 created_at
+    PRIMARY KEY (id, created_at),
+    KEY idx_session_turn (session_id, turn_no),
+    KEY idx_session_time (session_id, created_at),
+    KEY idx_intent_time (intent, created_at),
+    KEY idx_user_time (user_id, created_at)
+)
+ENGINE=InnoDB
+DEFAULT CHARSET=utf8mb4
+PARTITION BY RANGE COLUMNS(created_at) (
+    -- 历史数据分区（根据实际需求调整）
+    PARTITION p202511 VALUES LESS THAN ('2025-12-01'),
+    PARTITION p202512 VALUES LESS THAN ('2026-01-01'),
+    -- 2026年按月分区
+    PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
+    PARTITION p202602 VALUES LESS THAN ('2026-03-01'),
+    PARTITION p202603 VALUES LESS THAN ('2026-04-01'),
+    PARTITION p202604 VALUES LESS THAN ('2026-05-01'),
+    PARTITION p202605 VALUES LESS THAN ('2026-06-01'),
+    PARTITION p202606 VALUES LESS THAN ('2026-07-01'),
+    -- ... 可以预建几个月 ...
+    
+    -- 兜底分区，存放未来的数据，防止插入报错
+    PARTITION p_future VALUES LESS THAN (MAXVALUE)
+);
+
+
+CREATE TABLE IF NOT EXISTS chat_turn_retrieval (
+    id                BIGINT AUTO_INCREMENT,
+    turn_id           BIGINT NOT NULL,                 -- 关联 qa_turn.id
+
+    item_type         VARCHAR(32) NOT NULL,            -- METRIC/SNIPPET/CHAT
+    item_id           VARCHAR(128) NOT NULL,           -- metric_id/snippet_id/table_name 等
+    item_extra        JSON NULL,                       -- 附加信息，如字段名等
+
+    similarity_score  DECIMAL(10,6) NULL,              -- 相似度
+    rank_no           INT NULL,                        -- 检索排名
+    used_in_reasoning TINYINT(1) NOT NULL DEFAULT 0,   -- 是否参与推理
+    used_in_sql       TINYINT(1) NOT NULL DEFAULT 0,   -- 是否影响最终SQL
+
+    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+    -- 主键改为联合主键，必须包含 created_at
+    PRIMARY KEY (id, created_at),
+    KEY idx_turn (turn_id),
+    KEY idx_turn_type (turn_id, item_type),
+    KEY idx_item (item_type, item_id)
+)
+ENGINE=InnoDB
+DEFAULT CHARSET=utf8mb4
+PARTITION BY RANGE COLUMNS(created_at) (
+    -- 历史数据分区（根据实际需求调整）
+    PARTITION p202511 VALUES LESS THAN ('2025-12-01'),
+    PARTITION p202512 VALUES LESS THAN ('2026-01-01'),
+    -- 2026年按月分区
+    PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
+    PARTITION p202602 VALUES LESS THAN ('2026-03-01'),
+    PARTITION p202603 VALUES LESS THAN ('2026-04-01'),
+    PARTITION p202604 VALUES LESS THAN ('2026-05-01'),
+    PARTITION p202605 VALUES LESS THAN ('2026-06-01'),
+    PARTITION p202606 VALUES LESS THAN ('2026-07-01'),
+    -- ... 可以预建几个月 ...
+    
+    -- 兜底分区，存放未来的数据，防止插入报错
+    PARTITION p_future VALUES LESS THAN (MAXVALUE)
+);
--- a/file/tableschema/metrics.sql
+++ b/file/tableschema/metrics.sql
@ -0,0 +1,155 @@
+CREATE TABLE metric_def (
+    id                BIGINT AUTO_INCREMENT PRIMARY KEY,
+    
+    metric_code       VARCHAR(64) NOT NULL,          -- 内部编码：order_cnt_delivery
+    metric_name       VARCHAR(128) NOT NULL,         -- 中文名：外送订单数
+    metric_aliases    JSON NULL,                     -- 别名列表
+
+    biz_domain        VARCHAR(64) NOT NULL,          -- 通过table tag获取，支持人工配置
+    biz_desc          TEXT NULL,                     -- 业务口径描述
+    
+    chat_turn_id      BIGINT NULL,                   -- 来自哪轮会话
+    
+    tech_desc         TEXT NULL,                     -- 技术口径描述
+    formula_expr      TEXT NULL,                     -- 公式描述："sum(pay_amount)"
+    base_sql          MEDIUMTEXT NOT NULL,           -- 标准计算SQL（逻辑SQL/snippet）
+
+    time_grain        VARCHAR(32) NOT NULL,          -- DAY/HOUR/WEEK/MONTH
+    dim_binding       JSON NOT NULL,                 -- 维度绑定，如 ["dt","store_id","channel"]
+
+    update_strategy   VARCHAR(32) NOT NULL,          -- FULL/INCR/REALTIME
+    schedule_id       BIGINT NULL,                   -- 调度ID
+    schedule_type     INT NULL,                      -- 调度类型，默认调度cron
+    
+    version           INT NOT NULL DEFAULT 1,
+    is_active         TINYINT(1) NOT NULL DEFAULT 1,
+
+    sql_hash          VARCHAR(64) NULL,              -- base_sql hash 用于版本比较
+    created_by        BIGINT NULL,
+    updated_by        BIGINT NULL,
+
+    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+
+    UNIQUE KEY uk_metric_code (metric_code),
+    KEY idx_domain_active (biz_domain, is_active),
+    KEY idx_update_strategy (update_strategy),
+    KEY idx_name (metric_name)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+
+
+CREATE TABLE metric_schedule (
+    id                BIGINT AUTO_INCREMENT PRIMARY KEY,
+    metric_id         BIGINT NOT NULL,                -- 关联 metric_def.id
+
+    cron_expr         VARCHAR(64) NOT NULL,           -- 调度表达式
+    enabled           TINYINT(1) NOT NULL DEFAULT 1,  -- 是否启用
+    priority          INT NOT NULL DEFAULT 10,        -- 优先级
+
+    backfill_allowed  TINYINT(1) NOT NULL DEFAULT 1,  -- 是否允许补数
+    max_runtime_sec   INT NULL,                       -- 最大运行时长（秒）
+    retry_times       INT NOT NULL DEFAULT 0,         -- 失败重试次数
+
+    owner_team        VARCHAR(64) NULL,
+    owner_user_id     BIGINT NULL,
+
+    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+
+    KEY idx_metric_enabled (metric_id, enabled),
+    KEY idx_owner (owner_team, owner_user_id)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+
+
+CREATE TABLE metric_job_run (
+    id                BIGINT AUTO_INCREMENT,
+
+    metric_id         BIGINT NOT NULL,                -- metric_def.id
+    schedule_id       BIGINT NULL,                    -- metric_schedule.id，手动触发则可为空
+    source_turn_id    BIGINT NULL,                    -- 若本次运行由某次问答触发，关联 qa_turn.id
+
+    data_time_from    DATETIME NULL,                  -- 指标统计时间窗口起
+    data_time_to      DATETIME NULL,                  -- 指标统计时间窗口止
+
+    metric_version    INT NOT NULL,                   -- 执行时使用的指标版本
+    base_sql_snapshot MEDIUMTEXT NOT NULL,            -- 本次执行使用的SQL快照
+
+    status            VARCHAR(32) NOT NULL,           -- RUNNING/SUCCESS/FAILED/SKIPPED
+    error_msg         TEXT NULL,
+
+    affected_rows     BIGINT NULL,                    -- 写入行数
+    runtime_ms        BIGINT NULL,                    -- 执行耗时
+
+    triggered_by      VARCHAR(32) NOT NULL,           -- SCHEDULER/MANUAL/API/QA_TURN
+    triggered_at      DATETIME NOT NULL,
+    started_at        DATETIME NULL,
+    finished_at       DATETIME NULL,
+
+    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+    -- 主键改为联合主键，必须包含 created_at
+    PRIMARY KEY (id, created_at),
+    KEY idx_metric_time (metric_id, data_time_from, data_time_to),
+    KEY idx_status_time (status, triggered_at),
+    KEY idx_schedule (schedule_id),
+    KEY idx_source_turn (source_turn_id)
+)
+ENGINE=InnoDB
+DEFAULT CHARSET=utf8mb4
+PARTITION BY RANGE COLUMNS(created_at) (
+    -- 历史数据分区（根据实际需求调整）
+    PARTITION p202511 VALUES LESS THAN ('2025-12-01'),
+    PARTITION p202512 VALUES LESS THAN ('2026-01-01'),
+    -- 2026年按月分区
+    PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
+    PARTITION p202602 VALUES LESS THAN ('2026-03-01'),
+    PARTITION p202603 VALUES LESS THAN ('2026-04-01'),
+    PARTITION p202604 VALUES LESS THAN ('2026-05-01'),
+    PARTITION p202605 VALUES LESS THAN ('2026-06-01'),
+    PARTITION p202606 VALUES LESS THAN ('2026-07-01'),
+    -- ... 可以预建几个月 ...
+    
+    -- 兜底分区，存放未来的数据，防止插入报错
+    PARTITION p_future VALUES LESS THAN (MAXVALUE)
+);
+
+
+CREATE TABLE metric_result (
+    id                BIGINT AUTO_INCREMENT,
+
+    metric_id         BIGINT NOT NULL,                -- metric_def.id
+    metric_version    INT NOT NULL,                   -- metric_def.version
+    stat_time         DATETIME NOT NULL,              -- 按 time_grain 对齐后的时间
+
+    extra_dims        JSON NULL,                      -- 其他维度，JSON 存
+
+    metric_value      DECIMAL(32,8) NOT NULL,         -- 指标结果值
+
+    load_time         DATETIME NOT NULL,              -- 入库时间
+    data_version      BIGINT NULL,                    -- 版本或 job_run id
+
+    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+    -- 主键改为联合主键，必须包含 created_at
+    PRIMARY KEY (id, created_at),
+    KEY idx_metric_time (metric_id, stat_time),
+    KEY idx_load_time (load_time)
+)
+ENGINE=InnoDB
+DEFAULT CHARSET=utf8mb4
+PARTITION BY RANGE COLUMNS(created_at) (
+    -- 历史数据分区（根据实际需求调整）
+    PARTITION p202511 VALUES LESS THAN ('2025-12-01'),
+    PARTITION p202512 VALUES LESS THAN ('2026-01-01'),
+    -- 2026年按月分区
+    PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
+    PARTITION p202602 VALUES LESS THAN ('2026-03-01'),
+    PARTITION p202603 VALUES LESS THAN ('2026-04-01'),
+    PARTITION p202604 VALUES LESS THAN ('2026-05-01'),
+    PARTITION p202605 VALUES LESS THAN ('2026-06-01'),
+    PARTITION p202606 VALUES LESS THAN ('2026-07-01'),
+    -- ... 可以预建几个月 ...
+    
+    -- 兜底分区，存放未来的数据，防止插入报错
+    PARTITION p_future VALUES LESS THAN (MAXVALUE)
+);
--- a/file/tableschema/table_snippet.sql
+++ b/file/tableschema/table_snippet.sql
@ -4,6 +4,9 @@ CREATE TABLE `action_results` (
  `version_ts` bigint NOT NULL COMMENT '版本时间戳（版本号）',
  `action_type` enum('ge_profiling','ge_result_desc','snippet','snippet_alias') COLLATE utf8mb4_bin NOT NULL COMMENT '动作类型',
  `status` enum('pending','running','success','failed','partial') COLLATE utf8mb4_bin NOT NULL DEFAULT 'pending' COMMENT '执行状态',
+  `model` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型名称',
+  `model_provider` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型渠道',
+  `model_params` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型参数，如温度',
  `llm_usage` json DEFAULT NULL COMMENT 'LLM token usage统计',
  `error_code` varchar(128) COLLATE utf8mb4_bin DEFAULT NULL,
  `error_message` text COLLATE utf8mb4_bin,
--- a/main.py
+++ b/main.py
@ -0,0 +1,6 @@
+def main():
+    print("Hello from data-ge-new!")
+
+
+if __name__ == "__main__":
+    main()
--- a/project.md
+++ b/project.md
@ -0,0 +1,23 @@
+项目结构与逻辑
+
+app/main.py：创建 FastAPI 应用与生命周期，初始化共享 httpx.AsyncClient 和 LLMGateway，统一异常处理后暴露四个接口：聊天代理、导入分析、表画像流水线、表片段入库。
+app/models.py：定义所有请求/响应模型与枚举（LLM 请求、导入分析作业、表画像作业、片段入库等），并给出字段校验与默认值。
+app/services：核心业务逻辑
+gateway.py 将 /v1/chat/completions 请求转发到 NEW_API_BASE_URL（带可选 Bearer Token），并归一化返回。
+import_analysis.py 组装导入提示词（prompt/data_import_analysis.md）、解析/截断样本、调用统一聊天接口、抽取 JSON 结果与 token 用量，最后回调业务方。
+table_profiling.py 串行执行 4 步流水线：Great Expectations profiling → LLM 结果描述（prompt/ge_result_desc_prompt.md）→ 片段生成（prompt/snippet_generator.md）→ 片段别名（prompt/snippet_alias_generator.md），每步都回调状态与结果。
+table_snippet.py 将各步骤结果 upsert 到数据库表，自动序列化 JSON/大小信息并构造 INSERT ... ON DUPLICATE KEY UPDATE。
+app/providers/*：各云厂商直连客户端（OpenAI/Anthropic/OpenRouter/Gemini/Qwen/DeepSeek），实现统一 chat 接口；当前主流程通过 new-api 转发，但保留直连能力。
+prompt/ 存放提示词模板；scripts/ 与 test/ 目录提供接口调用示例和回归样本；table_snippet.sql 给出 action_results 表结构（用于片段与 profiling 结果持久化）。
+功能/需求说明
+
+LLM 网关：POST /v1/chat/completions 接收 LLMRequest（provider+model+messages 等），将 payload 透传到 NEW_API_BASE_URL/v1/chat/completions，带可选 NEW_API_AUTH_TOKEN 认证；异常时返回 4xx/5xx 并记录原始响应。
+导入分析（异步）：POST /v1/import/analyze 接收导入样本（rows/headers/raw_csv/table_schema）、目标模型 llm_model（默认 DEFAULT_IMPORT_MODEL，可被 IMPORT_SUPPORTED_MODELS 白名单限制）、温度与回调地址。服务将样本转 CSV、附加 schema，拼接系统+用户消息后调用统一聊天接口，解析首个 choice 中的 JSON 作为分析结果，连同 LLM usage 一并以回调形式返回；失败时回调 status=failed 与错误信息。
+表画像流水线（异步）：POST /v1/table/profiling 接收表标识、版本号、回调地址及 GE/LLM 配置（datasource/batch_request、连接串模板、LLM 模型与超时）。流水线按顺序执行：
+Great Expectations profiling（可指定 profiler 类型、datasource、runtime SQL 查询/表），生成完整与摘要 JSON 及 Data Docs 路径；
+调用聊天接口生成 GE 结果描述 JSON；
+基于描述生成 SQL 片段数组；
+生成片段别名/关键词。
+每步成功/失败都会回调，payload 包含 action_type、结果 JSON、模型、llm_usage、报错信息等。
+片段结果入库：POST /v1/table/snippet 接收 TableSnippetUpsertRequest（表/版本、action 类型、状态、schema、模型信息、各阶段 JSON 及大小、错误码、时间戳等），组装到 action_results 表进行 UPSERT，返回是否更新已有记录。
+配置与运行要求：核心环境变量在 app/settings.py（API Keys、DEFAULT_IMPORT_MODEL、IMPORT_GATEWAY_BASE_URL/NEW_API_BASE_URL、模型白名单、数据库 URL 等）；日志使用 logging.yaml 自动创建 logs/；HTTP 客户端超时/代理可通过 HTTP_CLIENT_TIMEOUT、HTTP_CLIENT_TRUST_ENV、HTTP_CLIENT_PROXY 控制。 调试可用 uvicorn app.main:app --reload，Docker 由 Dockerfile/docker-compose.yml 提供。
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,7 @@
+[project]
+name = "data-ge-new"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = []
--- a/requirements.txt
+++ b/requirements.txt
@ -1,13 +0,0 @@
-fastapi>=0.111.0
-uvicorn[standard]>=0.29.0
-pydantic>=2.6.0
-sqlalchemy>=2.0.28
-pymysql>=1.1.0
-great_expectations>=0.18.0,<0.19.0
-pandas>=2.0
-numpy>=1.24
-openpyxl>=3.1
-httpx==0.27.2
-python-dotenv==1.0.1
-requests>=2.31.0
-PyYAML>=6.0.1
--- a/test/test_chat_api_mysql.py
+++ b/test/test_chat_api_mysql.py
@ -0,0 +1,142 @@
+from __future__ import annotations
+
+import os
+import random
+from pathlib import Path
+from typing import Generator, List
+import sys
+
+import pytest
+from fastapi.testclient import TestClient
+from sqlalchemy import text
+from sqlalchemy.exc import SQLAlchemyError
+
+# Ensure the project root is importable when running directly via python.
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+from app import db
+from app.main import create_app
+
+
+TEST_USER_ID = 872341
+SCHEMA_PATH = Path("file/tableschema/chat.sql")
+DEFAULT_MYSQL_URL = "mysql+pymysql://root:12345678@127.0.0.1:3306/data-ge?charset=utf8mb4"
+
+
+@pytest.fixture(scope="module")
+def client() -> Generator[TestClient, None, None]:
+    mysql_url = os.getenv("TEST_DATABASE_URL", DEFAULT_MYSQL_URL)
+    os.environ["DATABASE_URL"] = mysql_url
+    db.get_engine.cache_clear()
+    engine = db.get_engine()
+    try:
+        # Quick connectivity check
+        with engine.connect() as conn:
+            conn.execute(text("SELECT 1"))
+    except SQLAlchemyError:
+        pytest.skip(f"Cannot connect to MySQL at {mysql_url}")
+
+    #_ensure_chat_schema(engine)
+
+    app = create_app()
+    with TestClient(app) as test_client:
+        yield test_client
+
+    # cleanup test artifacts
+    with engine.begin() as conn:
+        # remove retrievals and turns tied to test sessions
+        conn.execute(
+            text(
+                """
+                DELETE FROM chat_turn_retrieval
+                WHERE turn_id IN (
+                    SELECT id FROM chat_turn WHERE session_id IN (SELECT id FROM chat_session WHERE user_id=:uid)
+                )
+                """
+            ),
+            {"uid": TEST_USER_ID},
+        )
+        conn.execute(
+            text("DELETE FROM chat_turn WHERE session_id IN (SELECT id FROM chat_session WHERE user_id=:uid)"),
+            {"uid": TEST_USER_ID},
+        )
+        conn.execute(text("DELETE FROM chat_session WHERE user_id=:uid"), {"uid": TEST_USER_ID})
+    db.get_engine.cache_clear()
+
+
+def test_session_lifecycle_mysql(client: TestClient) -> None:
+    # Create a session
+    resp = client.post("/api/v1/chat/sessions", json={"user_id": TEST_USER_ID})
+    assert resp.status_code == 200, resp.text
+    session = resp.json()
+    session_id = session["id"]
+    assert session["status"] == "OPEN"
+
+    # Get session
+    assert client.get(f"/api/v1/chat/sessions/{session_id}").status_code == 200
+
+    # List sessions (filter by user)
+    resp = client.get(f"/api/v1/chat/sessions", params={"user_id": TEST_USER_ID})
+    assert resp.status_code == 200
+    assert any(item["id"] == session_id for item in resp.json())
+
+    # Update status
+    resp = client.post(f"/api/v1/chat/sessions/{session_id}/update", json={"status": "PAUSED"})
+    assert resp.status_code == 200
+    assert resp.json()["status"] == "PAUSED"
+
+    # Close session
+    resp = client.post(f"/api/v1/chat/sessions/{session_id}/close")
+    assert resp.status_code == 200
+    assert resp.json()["status"] == "CLOSED"
+
+
+def test_turns_and_retrievals_mysql(client: TestClient) -> None:
+    session_id = client.post("/api/v1/chat/sessions", json={"user_id": TEST_USER_ID}).json()["id"]
+    turn_payload = {
+        "user_id": TEST_USER_ID,
+        "user_query": "展示昨天订单GMV",
+        "intent": "METRIC_QUERY",
+        "ast_json": {"select": ["gmv"], "where": {"dt": "yesterday"}},
+        "main_metric_ids": [random.randint(1000, 9999)],
+        "created_metric_ids": [],
+    }
+    resp = client.post(f"/api/v1/chat/sessions/{session_id}/turns", json=turn_payload)
+    assert resp.status_code == 200, resp.text
+    turn = resp.json()
+    turn_id = turn["id"]
+    assert turn["turn_no"] == 1
+
+    # Fetch turn
+    assert client.get(f"/api/v1/chat/turns/{turn_id}").status_code == 200
+
+    # List turns under session
+    resp = client.get(f"/api/v1/chat/sessions/{session_id}/turns")
+    assert resp.status_code == 200
+    assert any(t["id"] == turn_id for t in resp.json())
+
+    # Insert retrievals
+    retrievals_payload = {
+        "retrievals": [
+            {"item_type": "METRIC", "item_id": "metric_foo", "used_in_sql": True, "rank_no": 1},
+            {"item_type": "SNIPPET", "item_id": "snpt_bar", "similarity_score": 0.77, "rank_no": 2},
+        ]
+    }
+    resp = client.post(f"/api/v1/chat/turns/{turn_id}/retrievals", json=retrievals_payload)
+    assert resp.status_code == 200
+    assert resp.json()["inserted"] == 2
+
+    # List retrievals
+    resp = client.get(f"/api/v1/chat/turns/{turn_id}/retrievals")
+    assert resp.status_code == 200
+    items = resp.json()
+    assert len(items) == 2
+    assert {item["item_type"] for item in items} == {"METRIC", "SNIPPET"}
+
+
+if __name__ == "__main__":
+    import pytest as _pytest
+
+    raise SystemExit(_pytest.main([__file__]))
--- a/test/test_rag_client.py
+++ b/test/test_rag_client.py
@ -0,0 +1,91 @@
+from __future__ import annotations
+
+import json
+
+import httpx
+import pytest
+
+from app.exceptions import ProviderAPICallError
+from app.schemas.rag import RagDeleteRequest, RagItemPayload, RagRetrieveRequest
+from app.services.rag_client import RagAPIClient
+
+
+@pytest.mark.asyncio
+async def test_add_sends_payload_and_headers() -> None:
+    rag_client = RagAPIClient(base_url="http://rag.test", auth_token="secret-token")
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        assert request.method == "POST"
+        assert str(request.url) == "http://rag.test/rag/add"
+        assert request.headers["Authorization"] == "Bearer secret-token"
+        payload = json.loads(request.content.decode())
+        assert payload == {
+            "id": 1,
+            "workspaceId": 2,
+            "name": "demo",
+            "embeddingData": "vector",
+            "type": "METRIC",
+        }
+        return httpx.Response(200, json={"ok": True, "echo": payload})
+
+    transport = httpx.MockTransport(handler)
+    async with httpx.AsyncClient(transport=transport) as client:
+        result = await rag_client.add(
+            client,
+            RagItemPayload(id=1, workspaceId=2, name="demo", embeddingData="vector", type="METRIC"),
+        )
+    assert result["ok"] is True
+    assert result["echo"]["name"] == "demo"
+
+
+@pytest.mark.asyncio
+async def test_add_batch_serializes_list() -> None:
+    rag_client = RagAPIClient(base_url="http://rag.test", auth_token=None)
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        payload = json.loads(request.content.decode())
+        assert request.url.path == "/rag/addBatch"
+        assert isinstance(payload, list) and len(payload) == 2
+        return httpx.Response(200, json={"received": len(payload)})
+
+    items = [
+        RagItemPayload(id=1, workspaceId=2, name="a", embeddingData="vec-a", type="METRIC"),
+        RagItemPayload(id=2, workspaceId=2, name="b", embeddingData="vec-b", type="METRIC"),
+    ]
+    transport = httpx.MockTransport(handler)
+    async with httpx.AsyncClient(transport=transport) as client:
+        result = await rag_client.add_batch(client, items)
+    assert result == {"received": 2}
+
+
+@pytest.mark.asyncio
+async def test_http_error_raises_provider_error() -> None:
+    rag_client = RagAPIClient(base_url="http://rag.test")
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        return httpx.Response(500, text="boom")
+
+    transport = httpx.MockTransport(handler)
+    async with httpx.AsyncClient(transport=transport) as client:
+        with pytest.raises(ProviderAPICallError) as excinfo:
+            await rag_client.delete(client, RagDeleteRequest(id=1, type="METRIC"))
+
+    err = excinfo.value
+    assert err.status_code == 500
+    assert "boom" in (err.response_text or "")
+
+
+@pytest.mark.asyncio
+async def test_non_json_response_returns_raw_text() -> None:
+    rag_client = RagAPIClient(base_url="http://rag.test")
+
+    def handler(request: httpx.Request) -> httpx.Response:
+        return httpx.Response(200, text="plain-text-body")
+
+    transport = httpx.MockTransport(handler)
+    async with httpx.AsyncClient(transport=transport) as client:
+        result = await rag_client.retrieve(
+            client, RagRetrieveRequest(query="foo", num=1, workspaceId=1, type="METRIC")
+        )
+    assert result == {"raw": "plain-text-body"}
+
--- a/uv.lock
+++ b/uv.lock
@ -0,0 +1,13 @@
+version = 1
+revision = 1
+requires-python = ">=3.11"
+resolution-markers = [
+    "python_full_version >= '3.14'",
+    "python_full_version >= '3.12' and python_full_version < '3.14'",
+    "python_full_version < '3.12'",
+]
+
+[[package]]
+name = "data-ge-new"
+version = "0.1.0"
+source = { virtual = "." }
Author	SHA1	Message	Date
zhaoawd	ebd79b75bd	补充知识生成链路节点数据表	2025-12-08 23:17:22 +08:00
zhaoawd	0bc26ef4a1	指标生成和查询相关功能api	2025-12-08 23:16:13 +08:00
zhaoawd	509dae3270	会话及轮次等相关api	2025-12-08 23:15:04 +08:00
zhaoawd	f261121845	切换成new-api方式进行llm调用	2025-12-08 23:11:43 +08:00
zhaoawd	eefaf91ed1	整理几个文件到对应目录	2025-12-08 23:08:11 +08:00
zhaoawd	abe3d479a4	使用uv init方式管理项目	2025-12-08 23:06:23 +08:00
zhaoawd	368ffaaaae	增加模型信息的记录入库字段	2025-11-14 00:58:29 +08:00
zhaoawd	a72ca3593e	demo数据	2025-11-14 00:58:00 +08:00
				`@ -0,0 +1 @@`
				{"role": "dimension", "time": {"range": null, "column": null, "has_gaps": null, "granularity": "unknown"}, "grain": ["service_point_id"], "table": "data-ge.water_meter_info", "columns": [{"name": "meter_subtype", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 9, "distinct_ratio": 0.03, "pk_candidate_score": 0.03, "metric_candidate_score": 0.0}, {"name": "installation_position", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 4, "distinct_ratio": 0.013333333333333334, "pk_candidate_score": 0.013333333333333334, "metric_candidate_score": 0.0}, {"name": "supply_office", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 11, "distinct_ratio": 0.03666666666666667, "pk_candidate_score": 0.03666666666666667, "metric_candidate_score": 0.0}, {"name": "meter_diameter", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 8, "distinct_ratio": 0.02666666666666667, "pk_candidate_score": 0.02666666666666667, "metric_candidate_score": 0.0}, {"name": "account_id", "dtype": "unknown", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列的统计指标（如空值率、唯一性）缺失，但根据命名规则推断为ID。", "enumish": null, "null_rate": null, "top_values": [], "semantic_type": "id", "distinct_count": null, "distinct_ratio": null, "pk_candidate_score": 0.9, "metric_candidate_score": 0.0}, {"name": "service_point_id", "dtype": "unknown", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列的统计指标（如空值率、唯一性）缺失，但根据命名规则推断为ID。", "enumish": null, "null_rate": null, "top_values": [], "semantic_type": "id", "distinct_count": null, "distinct_ratio": null, "pk_candidate_score": 0.95, "metric_candidate_score": 0.0}, {"name": "station", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 36, "distinct_ratio": 0.12, "pk_candidate_score": 0.12, "metric_candidate_score": 0.0}, {"name": "meter_type", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 5, "distinct_ratio": 0.016666666666666666, "pk_candidate_score": 0.016666666666666666, "metric_candidate_score": 0.0}, {"name": "district", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 13, "distinct_ratio": 0.043333333333333335, "pk_candidate_score": 0.043333333333333335, "metric_candidate_score": 0.0}, {"name": "meter_status", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列只有一个唯一值 '有效'。", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 1, "distinct_ratio": 0.0033333333333333335, "pk_candidate_score": 0.0033333333333333335, "metric_candidate_score": 0.0}], "quality": {"warning_hints": ["列 'meter_status' 只有一个唯一值 '有效'，可能为常量列。"], "failed_expectations": []}, "row_count": 300, "fk_candidates": [], "confidence_notes": ["表角色(role)被推断为 'dimension'，因为其列几乎完全由ID和类别属性构成，且缺少数值指标或时间序列列。", "主键候选(primary_key_candidates) 'service_point_id' 和 'account_id' 是基于命名约定（包含'_id'）推断的。其唯一性和非空性未在GE结果中直接度量，因此这是一个高置信度的猜测。", "表粒度(grain)可能为 'service_point'，与推断的主键 'service_point_id' 相对应。", "未根据列名或数据格式识别出时间列。"], "primary_key_candidates": [["service_point_id"], ["account_id"]]}