安装启动数据分析治理服务指引

2025-10-30 23:01:19 +08:00
66 changed files with 140 additions and 18372 deletions
--- a/.env
+++ b/.env
@ -16,11 +16,8 @@ DEFAULT_IMPORT_MODEL=deepseek:deepseek-chat
 # Service configuration
 IMPORT_GATEWAY_BASE_URL=http://localhost:8000

-# prod nbackend base url
-NBACKEND_BASE_URL=https://chatbi.agentcarrier.cn/chatbi/api
-
 # HTTP client configuration
-HTTP_CLIENT_TIMEOUT=120
+HTTP_CLIENT_TIMEOUT=30
 HTTP_CLIENT_TRUST_ENV=false
 # HTTP_CLIENT_PROXY=

@ -30,5 +27,3 @@ IMPORT_CHAT_TIMEOUT_SECONDS=120
 # Logging
 LOG_LEVEL=INFO
 # LOG_FORMAT=%(asctime)s %(levelname)s %(name)s:%(lineno)d %(message)s
-NEW_API_BASE_URL=http://localhost:3000
-NEW_API_AUTH_TOKEN="sk-Q79KGFJRs5Vk9HsfFqoiJk948uLMDhAVe037AeCb31URyWGL"
--- a/.gitignore
+++ b/.gitignore
@ -4,5 +4,3 @@ gx/uncommitted/
 **/__pycache__/
 *.pyc
 .DS_Store
-gx/
-logs/
--- a/17
+++ b/17
@ -1,17 +0,0 @@
-FROM python:3.11-slim
-
-# 设置 pip 全局使用国内源
-ENV PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple/
-ENV PIP_TRUSTED_HOST=pypi.tuna.tsinghua.edu.cn
-
-WORKDIR /app
-
-COPY requirements.txt .
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r requirements.txt
-
-COPY . .
-
-EXPOSE 8000
-
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/README.md
+++ b/README.md
@ -2,7 +2,7 @@

 This project exposes a FastAPI-based microservice that provides:

- A unified chat completions gateway that now forwards requests to the internal `new-api` service (default `http://localhost:3000`) while preserving the same client-facing schema.
+- A unified chat completions gateway supporting multiple LLM providers (OpenAI, Anthropic, OpenRouter, Gemini, Qwen, DeepSeek, etc.)
 - An asynchronous data import analysis pipeline that orchestrates LLM calls to produce structured metadata and processing recommendations

 The following instructions cover environment setup, dependency installation, and running the backend service.
@ -56,7 +56,6 @@ Copy `.env.example` to `.env` (if provided) or edit `.env` to supply API keys an
 - `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `OPENROUTER_API_KEY`, etc.
 - `HTTP_CLIENT_TIMEOUT`, `IMPORT_CHAT_TIMEOUT_SECONDS`
 - `LOG_LEVEL`, `LOG_FORMAT` for logging
- `NEW_API_BASE_URL` (defaults to `http://localhost:3000`) and optional `NEW_API_AUTH_TOKEN` if the new-api component enforces authentication.


 ## Run the Backend Service
@ -78,9 +77,6 @@ nohup uvicorn app.main:app --host 0.0.0.0 --port 8000 > server.log 2>&1 &

 Or use a process manager such as `pm2`, `supervisor`, or systemd for production deployments.

-## API List
-1. 导入分析schema接口 http://localhost:8000/v1/import/analyze
-
 ## Additional Commands

 - Run the data import analysis example: `python test/data_import_analysis_example.py`
--- a/app/db.py
+++ b/app/db.py
@ -1,26 +0,0 @@
-from __future__ import annotations
-
-import os
-from functools import lru_cache
-
-from sqlalchemy import create_engine
-from sqlalchemy.engine import Engine
-
-
-@lru_cache(maxsize=1)
-def get_engine() -> Engine:
-    """Return a cached SQLAlchemy engine configured from DATABASE_URL."""
-    database_url = os.getenv(
-        "DATABASE_URL",
-        "mysql+pymysql://root:12345678@localhost:3306/data-ge?charset=utf8mb4",
-    )
-    connect_args = {}
-    if database_url.startswith("sqlite"):
-        connect_args["check_same_thread"] = False
-
-    return create_engine(
-        database_url,
-        pool_pre_ping=True,
-        future=True,
-        connect_args=connect_args,
-    )
--- a/app/main.py
+++ b/app/main.py
@ -2,65 +2,43 @@ from __future__ import annotations

 import asyncio
 import logging
-import logging.config
 import os
 from contextlib import asynccontextmanager
 from typing import Any

-import yaml
-
 import httpx
 from fastapi import Depends, FastAPI, HTTPException, Request
-from fastapi.exceptions import RequestValidationError
-from fastapi.responses import JSONResponse

 from app.exceptions import ProviderAPICallError, ProviderConfigurationError
 from app.models import (
-    ActionStatus,
-    ActionType,
    DataImportAnalysisJobAck,
    DataImportAnalysisJobRequest,
    LLMRequest,
    LLMResponse,
-    TableProfilingJobAck,
-    TableProfilingJobRequest,
-    TableSnippetRagIngestRequest,
-    TableSnippetRagIngestResponse,
-    TableSnippetUpsertRequest,
-    TableSnippetUpsertResponse,
 )
-from app.routers import chat_router, metrics_router
 from app.services import LLMGateway
 from app.services.import_analysis import process_import_analysis_job
-from app.services.table_profiling import process_table_profiling_job
-from app.services.table_snippet import ingest_snippet_rag_from_db, upsert_action_result
-
-
-def _ensure_log_directories(config: dict[str, Any]) -> None:
-    handlers = config.get("handlers", {})
-    for handler_config in handlers.values():
-        filename = handler_config.get("filename")
-        if not filename:
-            continue
-        directory = os.path.dirname(filename)
-        if directory and not os.path.exists(directory):
-            os.makedirs(directory, exist_ok=True)


 def _configure_logging() -> None:
-    config_path = os.getenv("LOGGING_CONFIG", "logging.yaml")
-    if os.path.exists(config_path):
-        with open(config_path, "r", encoding="utf-8") as fh:
-            config = yaml.safe_load(fh)
-        if isinstance(config, dict):
-            _ensure_log_directories(config)
-            logging.config.dictConfig(config)
-            return
-    logging.basicConfig(
-        level=logging.INFO,
-        format="%(asctime)s %(levelname)s %(name)s:%(lineno)d %(message)s",
+    level_name = os.getenv("LOG_LEVEL", "INFO").upper()
+    level = getattr(logging, level_name, logging.INFO)
+    log_format = os.getenv(
+        "LOG_FORMAT",
+        "%(asctime)s %(levelname)s %(name)s:%(lineno)d %(message)s",
    )

+    root = logging.getLogger()
+
+    if not root.handlers:
+        logging.basicConfig(level=level, format=log_format)
+    else:
+        root.setLevel(level)
+        formatter = logging.Formatter(log_format)
+        for handler in root.handlers:
+            handler.setLevel(level)
+            handler.setFormatter(formatter)
+

 _configure_logging()
 logger = logging.getLogger(__name__)
@ -140,27 +118,6 @@ def create_app() -> FastAPI:
        version="0.1.0",
        lifespan=lifespan,
    )
-    # Chat/metric management APIs
-    application.include_router(chat_router)
-    application.include_router(metrics_router)
-
-    @application.exception_handler(RequestValidationError)
-    async def request_validation_exception_handler(
-        request: Request, exc: RequestValidationError
-    ) -> JSONResponse:
-        try:
-            raw_body = await request.body()
-        except Exception:  # pragma: no cover - defensive
-            raw_body = b"<unavailable>"
-        truncated_body = raw_body[:4096]
-        logger.warning(
-            "Validation error on %s %s: %s | body preview=%s",
-            request.method,
-            request.url.path,
-            exc.errors(),
-            truncated_body.decode("utf-8", errors="ignore"),
-        )
-        return JSONResponse(status_code=422, content={"detail": exc.errors()})

    @application.post(
        "/v1/chat/completions",
@ -207,109 +164,6 @@ def create_app() -> FastAPI:

        return DataImportAnalysisJobAck(import_record_id=payload.import_record_id, status="accepted")

-    @application.post(
-        "/v1/table/profiling",
-        response_model=TableProfilingJobAck,
-        summary="Run end-to-end GE profiling pipeline and notify via callback per action",
-        status_code=202,
-    )
-    async def run_table_profiling(
-        payload: TableProfilingJobRequest,
-        gateway: LLMGateway = Depends(get_gateway),
-        client: httpx.AsyncClient = Depends(get_http_client),
-    ) -> TableProfilingJobAck:
-        request_copy = payload.model_copy(deep=True)
-
-        async def _runner() -> None:
-            await process_table_profiling_job(request_copy, gateway, client)
-
-        asyncio.create_task(_runner())
-
-        return TableProfilingJobAck(
-            table_id=payload.table_id,
-            version_ts=payload.version_ts,
-            status="accepted",
-        )
-
-    @application.post(
-        "/v1/table/snippet",
-        response_model=TableSnippetUpsertResponse,
-        summary="Persist or update action results, such as table snippets.",
-    )
-    async def upsert_table_snippet(
-        payload: TableSnippetUpsertRequest,
-        client: httpx.AsyncClient = Depends(get_http_client),
-    ) -> TableSnippetUpsertResponse:
-        request_copy = payload.model_copy(deep=True)
-
-        try:
-            response = await asyncio.to_thread(upsert_action_result, request_copy)
-        except Exception as exc:
-            logger.error(
-                "Failed to upsert table snippet: table_id=%s version_ts=%s action_type=%s",
-                payload.table_id,
-                payload.version_ts,
-                payload.action_type,
-                exc_info=True,
-            )
-            raise HTTPException(status_code=500, detail=str(exc)) from exc
-        else:
-            # After snippet_alias is stored, automatically trigger RAG ingest when configured.
-            if (
-                payload.action_type == ActionType.SNIPPET_ALIAS
-                and payload.status == ActionStatus.SUCCESS
-                and payload.rag_workspace_id is not None
-            ):
-                try:
-                    await ingest_snippet_rag_from_db(
-                        table_id=payload.table_id,
-                        version_ts=payload.version_ts,
-                        workspace_id=payload.rag_workspace_id,
-                        rag_item_type=payload.rag_item_type or "SNIPPET",
-                        client=client,
-                    )
-                except Exception:
-                    logger.exception(
-                        "Failed to ingest snippet RAG artifacts after snippet_alias upsert",
-                        extra={
-                            "table_id": payload.table_id,
-                            "version_ts": payload.version_ts,
-                            "workspace_id": payload.rag_workspace_id,
-                        },
-                    )
-
-            return response
-
-    @application.post(
-        "/v1/table/snippet/rag_ingest",
-        response_model=TableSnippetRagIngestResponse,
-        summary="Merge snippet+alias results from action_results and ingest into RAG.",
-    )
-    async def ingest_snippet_rag(
-        payload: TableSnippetRagIngestRequest,
-        client: httpx.AsyncClient = Depends(get_http_client),
-    ) -> TableSnippetRagIngestResponse:
-        try:
-            rag_item_ids = await ingest_snippet_rag_from_db(
-                table_id=payload.table_id,
-                version_ts=payload.version_ts,
-                workspace_id=payload.workspace_id,
-                rag_item_type=payload.rag_item_type or "SNIPPET",
-                client=client,
-            )
-        except Exception as exc:
-            logger.exception(
-                "Failed to ingest snippet RAG artifacts",
-                extra={
-                    "table_id": payload.table_id,
-                    "version_ts": payload.version_ts,
-                    "workspace_id": payload.workspace_id,
-                },
-            )
-            raise HTTPException(status_code=500, detail=str(exc)) from exc
-
-        return TableSnippetRagIngestResponse(rag_item_ids=rag_item_ids)
-
    @application.post("/__mock__/import-callback")
    async def mock_import_callback(payload: dict[str, Any]) -> dict[str, str]:
        logger.info("Received import analysis callback: %s", payload)
--- a/app/models.py
+++ b/app/models.py
@ -1,6 +1,5 @@
 from __future__ import annotations

-from datetime import datetime
 from enum import Enum
 from typing import Any, Dict, List, Optional, Union

@ -77,8 +76,8 @@ class DataImportAnalysisRequest(BaseModel):
        description="Ordered list of table headers associated with the data.",
    )
    llm_model: str = Field(
-        None,
-        description="Model identifier. Accepts 'provider:model_name' format or custom model alias.",
+        ...,
+        description="Model identifier. Accepts 'provider:model' format or plain model name.",
    )
    temperature: Optional[float] = Field(
        None,
@ -136,235 +135,3 @@ class DataImportAnalysisJobRequest(BaseModel):
 class DataImportAnalysisJobAck(BaseModel):
    import_record_id: str = Field(..., description="Echo of the import record identifier")
    status: str = Field("accepted", description="Processing status acknowledgement.")
-
-
-class ActionType(str, Enum):
-    GE_PROFILING = "ge_profiling"
-    GE_RESULT_DESC = "ge_result_desc"
-    SNIPPET = "snippet"
-    SNIPPET_ALIAS = "snippet_alias"
-
-
-class ActionStatus(str, Enum):
-    PENDING = "pending"
-    RUNNING = "running"
-    SUCCESS = "success"
-    FAILED = "failed"
-    PARTIAL = "partial"
-
-
-class TableProfilingJobRequest(BaseModel):
-    table_id: str = Field(..., description="Unique identifier for the table to profile.")
-    version_ts: str = Field(
-        ...,
-        pattern=r"^\d{14}$",
-        description="Version timestamp expressed as fourteen digit string (yyyyMMddHHmmss).",
-    )
-    callback_url: HttpUrl = Field(
-        ...,
-        description="Callback endpoint invoked after each pipeline action completes.",
-    )
-    llm_model: Optional[str] = Field(
-        None,
-        description="Default LLM model spec applied to prompt-based actions when overrides are omitted.",
-    )
-    table_schema: Optional[Any] = Field(
-        None,
-        description="Schema structure snapshot for the current table version.",
-    )
-    table_schema_version_id: Optional[str] = Field(
-        None,
-        description="Identifier for the schema snapshot provided in table_schema.",
-    )
-    table_link_info: Optional[Dict[str, Any]] = Field(
-        None,
-        description=(
-            "Information describing how to locate the source table for profiling. "
-            "For example: {'type': 'sql', 'connection_string': 'mysql+pymysql://user:pass@host/db', "
-            "'table': 'schema.table_name'}."
-        ),
-    )
-    table_access_info: Optional[Dict[str, Any]] = Field(
-        None,
-        description=(
-            "Credentials or supplemental parameters required to access the table described in table_link_info. "
-            "These values can be merged into the connection string using Python format placeholders."
-        ),
-    )
-    ge_batch_request: Optional[Dict[str, Any]] = Field(
-        None,
-        description="Optional Great Expectations batch request payload used for profiling.",
-    )
-    ge_expectation_suite_name: Optional[str] = Field(
-        None,
-        description="Expectation suite name used during profiling. Created automatically when absent.",
-    )
-    ge_data_context_root: Optional[str] = Field(
-        None,
-        description="Custom root directory for the Great Expectations data context. Defaults to project ./gx.",
-    )
-    ge_datasource_name: Optional[str] = Field(
-        None,
-        description="Datasource name registered inside the GE context when batch_request is not supplied.",
-    )
-    ge_data_asset_name: Optional[str] = Field(
-        None,
-        description="Data asset reference used when inferring batch request from datasource configuration.",
-    )
-    ge_profiler_type: str = Field(
-        "user_configurable",
-        description="Profiler implementation identifier. Currently supports 'user_configurable' or 'data_assistant'.",
-    )
-
-    result_desc_model: Optional[str] = Field(
-        None,
-        description="LLM model override used for GE result description (action 2).",
-    )
-    snippet_model: Optional[str] = Field(
-        None,
-        description="LLM model override used for snippet generation (action 3).",
-    )
-    snippet_alias_model: Optional[str] = Field(
-        None,
-        description="LLM model override used for snippet alias enrichment (action 4).",
-    )
-    extra_options: Optional[Dict[str, Any]] = Field(
-        None,
-        description="Miscellaneous execution flags applied across pipeline steps.",
-    )
-    workspace_id: Optional[int] = Field(
-        None,
-        ge=0,
-        description="Optional workspace identifier forwarded to snippet_alias callback for RAG ingestion.",
-    )
-    rag_item_type: Optional[str] = Field(
-        "SNIPPET",
-        description="Optional RAG item type forwarded to snippet_alias callback.",
-    )
-
-
-class TableProfilingJobAck(BaseModel):
-    table_id: str = Field(..., description="Echo of the table identifier.")
-    version_ts: str = Field(..., description="Echo of the profiling version timestamp (yyyyMMddHHmmss).")
-    status: str = Field("accepted", description="Processing acknowledgement status.")
-
-
-class TableSnippetUpsertRequest(BaseModel):
-    table_id: int = Field(..., ge=1, description="Unique identifier for the table.")
-    version_ts: int = Field(
-        ...,
-        ge=0,
-        description="Version timestamp aligned with the pipeline (yyyyMMddHHmmss as integer).",
-    )
-    workspace_id: Optional[int] = Field(
-        None,
-        ge=0,
-        description="Optional workspace identifier for RAG ingestion; when provided and action_type=snippet_alias "
-        "with status=success, merged snippets will be written to rag_snippet and pushed to RAG.",
-    )
-    rag_item_type: Optional[str] = Field(
-        "SNIPPET",
-        description="Optional RAG item type used when pushing snippets to RAG. Defaults to 'SNIPPET'.",
-    )
-    action_type: ActionType = Field(..., description="Pipeline action type for this record.")
-    status: ActionStatus = Field(
-        ActionStatus.SUCCESS, description="Execution status for the action."
-    )
-    callback_url: HttpUrl = Field(..., description="Callback URL associated with the action run.")
-    table_schema_version_id: int = Field(..., ge=0, description="Identifier for the schema snapshot.")
-    table_schema: Any = Field(..., description="Schema snapshot payload for the table.")
-    model: Optional[str] = Field(
-        None,
-        description="LLM model identifier (can be provider alias) used for this action, when applicable.",
-    )
-    model_provider: Optional[str] = Field(
-        None,
-        description="LLM provider responsible for executing the action's model.",
-    )
-    model_params: Optional[Dict[str, Any]] = Field(
-        None,
-        description="Optional model parameter overrides (e.g., temperature) associated with the action.",
-    )
-    llm_usage: Optional[Any] = Field(
-        None,
-        description="Optional token usage metrics reported by the LLM provider.",
-    )
-    ge_profiling_json: Optional[Any] = Field(
-        None, description="Full GE profiling result payload for the profiling action."
-    )
-    ge_profiling_json_size_bytes: Optional[int] = Field(
-        None, ge=0, description="Size in bytes of the GE profiling result JSON."
-    )
-    ge_profiling_summary: Optional[Any] = Field(
-        None, description="Sanitised GE profiling summary payload."
-    )
-    ge_profiling_summary_size_bytes: Optional[int] = Field(
-        None, ge=0, description="Size in bytes of the GE profiling summary JSON."
-    )
-    ge_profiling_total_size_bytes: Optional[int] = Field(
-        None, ge=0, description="Combined size (bytes) of profiling result + summary."
-    )
-    ge_profiling_html_report_url: Optional[str] = Field(
-        None, description="Optional URL to the generated GE profiling HTML report."
-    )
-    ge_result_desc_json: Optional[Any] = Field(
-        None, description="Result JSON for the GE result description action."
-    )
-    ge_result_desc_json_size_bytes: Optional[int] = Field(
-        None, ge=0, description="Size in bytes of the GE result description JSON."
-    )
-    snippet_json: Optional[Any] = Field(
-        None, description="Snippet generation action result JSON."
-    )
-    snippet_json_size_bytes: Optional[int] = Field(
-        None, ge=0, description="Size in bytes of the snippet result JSON."
-    )
-    snippet_alias_json: Optional[Any] = Field(
-        None, description="Snippet alias expansion result JSON."
-    )
-    snippet_alias_json_size_bytes: Optional[int] = Field(
-        None, ge=0, description="Size in bytes of the snippet alias result JSON."
-    )
-    error_code: Optional[str] = Field(None, description="Optional error code when status indicates a failure.")
-    error_message: Optional[str] = Field(None, description="Optional error message when status indicates a failure.")
-    started_at: Optional[datetime] = Field(
-        None, description="Timestamp when the action started executing."
-    )
-    finished_at: Optional[datetime] = Field(
-        None, description="Timestamp when the action finished executing."
-    )
-    duration_ms: Optional[int] = Field(
-        None,
-        ge=0,
-        description="Optional execution duration in milliseconds.",
-    )
-
-
-class TableSnippetRagIngestRequest(BaseModel):
-    table_id: int = Field(..., ge=1, description="Unique identifier for the table.")
-    version_ts: int = Field(
-        ...,
-        ge=0,
-        description="Version timestamp aligned with the pipeline (yyyyMMddHHmmss as integer).",
-    )
-    workspace_id: int = Field(..., ge=0, description="Workspace id used when pushing snippets to RAG.")
-    rag_item_type: Optional[str] = Field(
-        "SNIPPET",
-        description="Optional RAG item type used when pushing snippets to RAG. Defaults to 'SNIPPET'.",
-    )
-
-
-class TableSnippetRagIngestResponse(BaseModel):
-    rag_item_ids: List[int] = Field(..., description="List of ingested rag_item_ids.")
-    result_checksum: Optional[str] = Field(
-        None,
-        description="Optional checksum for the result payload (e.g., MD5).",
-    )
-
-
-class TableSnippetUpsertResponse(BaseModel):
-    table_id: int
-    version_ts: int
-    action_type: ActionType
-    status: ActionStatus
-    updated: bool
--- a/app/routers/init.py
+++ b/app/routers/init.py
@ -1,4 +0,0 @@
-from .chat import router as chat_router
-from .metrics import router as metrics_router
-
-__all__ = ["chat_router", "metrics_router"]
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@ -1,102 +0,0 @@
-from __future__ import annotations
-
-from datetime import datetime
-from typing import Any, List, Optional
-
-from fastapi import APIRouter, HTTPException, Query
-
-from app.schemas.chat import (
-    ChatSessionCreate,
-    ChatSessionUpdate,
-    ChatTurnCreate,
-    ChatTurnRetrievalBatch,
-)
-from app.services import metric_store
-
-
-router = APIRouter(prefix="/api/v1/chat", tags=["chat"])
-
-
-@router.post("/sessions")
-def create_session(payload: ChatSessionCreate) -> Any:
-    """Create a chat session."""
-    return metric_store.create_chat_session(payload)
-
-
-@router.post("/sessions/{session_id}/update")
-def update_session(session_id: int, payload: ChatSessionUpdate) -> Any:
-    try:
-        return metric_store.update_chat_session(session_id, payload)
-    except KeyError:
-        raise HTTPException(status_code=404, detail="Session not found")
-
-
-@router.post("/sessions/{session_id}/close")
-def close_session(session_id: int) -> Any:
-    """Close a chat session and stamp end_time."""
-    try:
-        return metric_store.close_chat_session(session_id)
-    except KeyError:
-        raise HTTPException(status_code=404, detail="Session not found")
-
-
-@router.get("/sessions/{session_id}")
-def get_session(session_id: int) -> Any:
-    """Fetch one session."""
-    session = metric_store.get_chat_session(session_id)
-    if not session:
-        raise HTTPException(status_code=404, detail="Session not found")
-    return session
-
-
-@router.get("/sessions")
-def list_sessions(
-    user_id: Optional[int] = None,
-    status: Optional[str] = None,
-    start_from: Optional[datetime] = Query(None, description="Filter by start time lower bound."),
-    start_to: Optional[datetime] = Query(None, description="Filter by start time upper bound."),
-    limit: int = Query(50, ge=1, le=500),
-    offset: int = Query(0, ge=0),
-) -> List[Any]:
-    return metric_store.list_chat_sessions(
-        user_id=user_id,
-        status=status,
-        start_from=start_from,
-        start_to=start_to,
-        limit=limit,
-        offset=offset,
-    )
-
-
-@router.post("/sessions/{session_id}/turns")
-def create_turn(session_id: int, payload: ChatTurnCreate) -> Any:
-    """Create a turn under a session."""
-    try:
-        return metric_store.create_chat_turn(session_id, payload)
-    except Exception as exc:
-        raise HTTPException(status_code=400, detail=str(exc)) from exc
-
-
-@router.get("/sessions/{session_id}/turns")
-def list_turns(session_id: int) -> List[Any]:
-    return metric_store.list_chat_turns(session_id)
-
-
-@router.get("/turns/{turn_id}")
-def get_turn(turn_id: int) -> Any:
-    turn = metric_store.get_chat_turn(turn_id)
-    if not turn:
-        raise HTTPException(status_code=404, detail="Turn not found")
-    return turn
-
-
-@router.post("/turns/{turn_id}/retrievals")
-def write_retrievals(turn_id: int, payload: ChatTurnRetrievalBatch) -> Any:
-    """Batch write retrieval records for a turn."""
-    count = metric_store.create_retrievals(turn_id, payload.retrievals)
-    return {"turn_id": turn_id, "inserted": count}
-
-
-@router.get("/turns/{turn_id}/retrievals")
-def list_retrievals(turn_id: int) -> List[Any]:
-    return metric_store.list_retrievals(turn_id)
--- a/app/routers/metrics.py
+++ b/app/routers/metrics.py
@ -1,166 +0,0 @@
-from __future__ import annotations
-
-from datetime import datetime
-from typing import Any, List, Optional
-
-from fastapi import APIRouter, HTTPException, Query
-
-from app.schemas.metrics import (
-    MetricCreate,
-    MetricResultsWriteRequest,
-    MetricRunTrigger,
-    MetricScheduleCreate,
-    MetricScheduleUpdate,
-    MetricUpdate,
-)
-from app.services import metric_store
-
-
-router = APIRouter(prefix="/api/v1", tags=["metrics"])
-
-
-@router.post("/metrics")
-def create_metric(payload: MetricCreate) -> Any:
-    """Create a metric definition."""
-    try:
-        return metric_store.create_metric(payload)
-    except Exception as exc:
-        raise HTTPException(status_code=400, detail=str(exc)) from exc
-
-
-@router.post("/metrics/{metric_id}")
-def update_metric(metric_id: int, payload: MetricUpdate) -> Any:
-    """Update fields of a metric definition."""
-    try:
-        return metric_store.update_metric(metric_id, payload)
-    except KeyError:
-        raise HTTPException(status_code=404, detail="Metric not found")
-    except Exception as exc:
-        raise HTTPException(status_code=400, detail=str(exc)) from exc
-
-
-@router.get("/metrics/{metric_id}")
-def get_metric(metric_id: int) -> Any:
-    """Fetch a metric definition by id."""
-    metric = metric_store.get_metric(metric_id)
-    if not metric:
-        raise HTTPException(status_code=404, detail="Metric not found")
-    return metric
-
-
-@router.get("/metrics")
-def list_metrics(
-    biz_domain: Optional[str] = None,
-    is_active: Optional[bool] = None,
-    keyword: Optional[str] = Query(None, description="Search by code/name"),
-    limit: int = Query(100, ge=1, le=500),
-    offset: int = Query(0, ge=0),
-) -> List[Any]:
-    """List metrics with optional filters."""
-    return metric_store.list_metrics(
-        biz_domain=biz_domain,
-        is_active=is_active,
-        keyword=keyword,
-        limit=limit,
-        offset=offset,
-    )
-
-
-@router.post("/metric-schedules")
-def create_schedule(payload: MetricScheduleCreate) -> Any:
-    """Create a metric schedule."""
-    try:
-        return metric_store.create_metric_schedule(payload)
-    except Exception as exc:
-        raise HTTPException(status_code=400, detail=str(exc)) from exc
-
-
-@router.post("/metric-schedules/{schedule_id}")
-def update_schedule(schedule_id: int, payload: MetricScheduleUpdate) -> Any:
-    """Update a metric schedule."""
-    try:
-        return metric_store.update_metric_schedule(schedule_id, payload)
-    except KeyError:
-        raise HTTPException(status_code=404, detail="Schedule not found")
-    except Exception as exc:
-        raise HTTPException(status_code=400, detail=str(exc)) from exc
-
-
-@router.get("/metrics/{metric_id}/schedules")
-def list_schedules(metric_id: int) -> List[Any]:
-    """List schedules for one metric."""
-    return metric_store.list_schedules_for_metric(metric_id=metric_id)
-
-
-@router.post("/metric-runs/trigger")
-def trigger_run(payload: MetricRunTrigger) -> Any:
-    """Insert a run record (execution handled externally)."""
-    try:
-        return metric_store.trigger_metric_run(payload)
-    except KeyError as exc:
-        raise HTTPException(status_code=404, detail=str(exc)) from exc
-    except Exception as exc:
-        raise HTTPException(status_code=400, detail=str(exc)) from exc
-
-
-@router.get("/metric-runs")
-def list_runs(
-    metric_id: Optional[int] = None,
-    status: Optional[str] = None,
-    limit: int = Query(100, ge=1, le=500),
-    offset: int = Query(0, ge=0),
-) -> List[Any]:
-    """List run records."""
-    return metric_store.list_metric_runs(
-        metric_id=metric_id, status=status, limit=limit, offset=offset
-    )
-
-
-@router.get("/metric-runs/{run_id}")
-def get_run(run_id: int) -> Any:
-    """Fetch run details."""
-    run = metric_store.get_metric_run(run_id)
-    if not run:
-        raise HTTPException(status_code=404, detail="Run not found")
-    return run
-
-
-@router.post("/metric-results/{metric_id}")
-def write_results(metric_id: int, payload: MetricResultsWriteRequest) -> Any:
-    # Align path metric_id with payload to avoid mismatch.
-    if payload.metric_id != metric_id:
-        raise HTTPException(status_code=400, detail="metric_id in path/body mismatch")
-    try:
-        inserted = metric_store.write_metric_results(payload)
-    except KeyError as exc:
-        raise HTTPException(status_code=404, detail=str(exc)) from exc
-    except Exception as exc:
-        raise HTTPException(status_code=400, detail=str(exc)) from exc
-    return {"metric_id": metric_id, "inserted": inserted}
-
-
-@router.get("/metric-results")
-def query_results(
-    metric_id: int,
-    stat_from: Optional[datetime] = None,
-    stat_to: Optional[datetime] = None,
-    limit: int = Query(200, ge=1, le=1000),
-    offset: int = Query(0, ge=0),
-) -> List[Any]:
-    """Query metric results by time range."""
-    return metric_store.query_metric_results(
-        metric_id=metric_id,
-        stat_from=stat_from,
-        stat_to=stat_to,
-        limit=limit,
-        offset=offset,
-    )
-
-
-@router.get("/metric-results/latest")
-def latest_result(metric_id: int) -> Any:
-    """Fetch the latest metric result."""
-    result = metric_store.latest_metric_result(metric_id)
-    if not result:
-        raise HTTPException(status_code=404, detail="Metric result not found")
-    return result
--- a/app/schemas/chat.py
+++ b/app/schemas/chat.py
@ -1,53 +0,0 @@
-from __future__ import annotations
-
-from datetime import datetime
-from typing import Any, List, Optional
-
-from pydantic import BaseModel, Field
-
-
-class ChatSessionCreate(BaseModel):
-    """Create a chat session to group multiple turns for a user."""
-    user_id: int = Field(..., description="User ID owning the session.")
-    session_uuid: Optional[str] = Field(None, description="Optional externally provided UUID.")
-    status: Optional[str] = Field("OPEN", description="Session status, default OPEN.")
-    end_time: Optional[datetime] = Field(None, description="Optional end time.")
-    ext_context: Optional[dict[str, Any]] = Field(None, description="Arbitrary business context.")
-
-
-class ChatSessionUpdate(BaseModel):
-    """Partial update for a chat session."""
-    status: Optional[str] = Field(None, description="New session status.")
-    end_time: Optional[datetime] = Field(None, description="Close time override.")
-    last_turn_id: Optional[int] = Field(None, description="Pointer to last chat turn.")
-    ext_context: Optional[dict[str, Any]] = Field(None, description="Context patch.")
-
-
-class ChatTurnCreate(BaseModel):
-    """Create a single chat turn with intent/SQL context."""
-    user_id: int = Field(..., description="User ID for this turn.")
-    user_query: str = Field(..., description="Raw user query content.")
-    intent: Optional[str] = Field(None, description="Intent tag such as METRIC_QUERY.")
-    ast_json: Optional[dict[str, Any]] = Field(None, description="Parsed AST payload.")
-    generated_sql: Optional[str] = Field(None, description="Final SQL text, if generated.")
-    sql_status: Optional[str] = Field(None, description="SQL generation/execution status.")
-    error_msg: Optional[str] = Field(None, description="Error message when SQL failed.")
-    main_metric_ids: Optional[List[int]] = Field(None, description="Metric IDs referenced in this turn.")
-    created_metric_ids: Optional[List[int]] = Field(None, description="Metric IDs created in this turn.")
-    end_time: Optional[datetime] = Field(None, description="Turn end time.")
-
-
-class ChatTurnRetrievalItem(BaseModel):
-    """Record of one retrieved item contributing to a turn."""
-    item_type: str = Field(..., description="METRIC/SNIPPET/CHAT etc.")
-    item_id: str = Field(..., description="Identifier such as metric_id or snippet_id.")
-    item_extra: Optional[dict[str, Any]] = Field(None, description="Additional context like column name.")
-    similarity_score: Optional[float] = Field(None, description="Similarity score.")
-    rank_no: Optional[int] = Field(None, description="Ranking position.")
-    used_in_reasoning: Optional[bool] = Field(False, description="Flag if used in reasoning.")
-    used_in_sql: Optional[bool] = Field(False, description="Flag if used in final SQL.")
-
-
-class ChatTurnRetrievalBatch(BaseModel):
-    """Batch insert wrapper for retrieval records."""
-    retrievals: List[ChatTurnRetrievalItem]
--- a/app/schemas/metrics.py
+++ b/app/schemas/metrics.py
@ -1,99 +0,0 @@
-from __future__ import annotations
-
-from datetime import datetime
-from typing import Any, List, Optional
-
-from pydantic import BaseModel, Field
-
-
-class MetricCreate(BaseModel):
-    """Create a metric definition with business and technical metadata."""
-    metric_code: str = Field(..., description="Internal metric code, unique.")
-    metric_name: str = Field(..., description="Display name.")
-    metric_aliases: Optional[List[str]] = Field(None, description="Optional alias list.")
-    biz_domain: str = Field(..., description="Business domain identifier.")
-    biz_desc: Optional[str] = Field(None, description="Business definition.")
-    chat_turn_id: Optional[int] = Field(None, description="Source chat turn ID.")
-    tech_desc: Optional[str] = Field(None, description="Technical definition.")
-    formula_expr: Optional[str] = Field(None, description="Formula expression text.")
-    base_sql: str = Field(..., description="Canonical SQL used to compute the metric.")
-    time_grain: str = Field(..., description="DAY/HOUR/WEEK/MONTH etc.")
-    dim_binding: List[str] = Field(..., description="Dimension columns bound to the metric.")
-    update_strategy: str = Field(..., description="FULL/INCR/REALTIME.")
-    schedule_id: Optional[int] = Field(None, description="Linked schedule id if any.")
-    schedule_type: Optional[int] = Field(None, description="Scheduler type identifier.")
-    is_active: bool = Field(True, description="Whether the metric is enabled.")
-    created_by: Optional[int] = Field(None, description="Creator user id.")
-    updated_by: Optional[int] = Field(None, description="Updater user id.")
-
-
-class MetricUpdate(BaseModel):
-    """Partial update for an existing metric definition."""
-    metric_name: Optional[str] = None
-    metric_aliases: Optional[List[str]] = None
-    biz_domain: Optional[str] = None
-    biz_desc: Optional[str] = None
-    tech_desc: Optional[str] = None
-    formula_expr: Optional[str] = None
-    base_sql: Optional[str] = None
-    time_grain: Optional[str] = None
-    dim_binding: Optional[List[str]] = None
-    update_strategy: Optional[str] = None
-    schedule_id: Optional[int] = None
-    schedule_type: Optional[int] = None
-    is_active: Optional[bool] = None
-    updated_by: Optional[int] = None
-
-
-class MetricScheduleCreate(BaseModel):
-    """Create a cron-based schedule for a metric."""
-    metric_id: int
-    cron_expr: str
-    enabled: bool = True
-    priority: int = 10
-    backfill_allowed: bool = True
-    max_runtime_sec: Optional[int] = None
-    retry_times: int = 0
-    owner_team: Optional[str] = None
-    owner_user_id: Optional[int] = None
-
-
-class MetricScheduleUpdate(BaseModel):
-    """Update fields of an existing metric schedule."""
-    cron_expr: Optional[str] = None
-    enabled: Optional[bool] = None
-    priority: Optional[int] = None
-    backfill_allowed: Optional[bool] = None
-    max_runtime_sec: Optional[int] = None
-    retry_times: Optional[int] = None
-    owner_team: Optional[str] = None
-    owner_user_id: Optional[int] = None
-
-
-class MetricRunTrigger(BaseModel):
-    """Trigger a metric run, optionally linking to a chat turn or schedule."""
-    metric_id: int
-    schedule_id: Optional[int] = None
-    source_turn_id: Optional[int] = None
-    data_time_from: Optional[datetime] = None
-    data_time_to: Optional[datetime] = None
-    metric_version: Optional[int] = None
-    base_sql_snapshot: Optional[str] = None
-    triggered_by: str = Field("API", description="SCHEDULER/MANUAL/API/QA_TURN")
-    triggered_at: Optional[datetime] = None
-
-
-class MetricResultItem(BaseModel):
-    """Single metric result row to be persisted."""
-    stat_time: datetime
-    metric_value: float
-    metric_version: Optional[int] = None
-    extra_dims: Optional[dict[str, Any]] = None
-    load_time: Optional[datetime] = None
-    data_version: Optional[int] = None
-
-
-class MetricResultsWriteRequest(BaseModel):
-    """Batch write request for metric results."""
-    metric_id: int
-    results: List[MetricResultItem]
--- a/app/schemas/rag.py
+++ b/app/schemas/rag.py
@ -1,46 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, List
-
-from pydantic import BaseModel, ConfigDict, Field
-
-
-class RagItemPayload(BaseModel):
-    """Payload for creating or updating a single RAG item."""
-
-    model_config = ConfigDict(populate_by_name=True, extra="ignore")
-
-    id: int = Field(..., description="Unique identifier for the RAG item.")
-    workspace_id: int = Field(..., alias="workspaceId", description="Workspace identifier.")
-    name: str = Field(..., description="Readable name of the item.")
-    embedding_data: str = Field(..., alias="embeddingData", description="Serialized embedding payload.")
-    type: str = Field(..., description='Item type, e.g. "METRIC".')
-
-
-class RagDeleteRequest(BaseModel):
-    """Payload for deleting a single RAG item."""
-
-    model_config = ConfigDict(populate_by_name=True, extra="ignore")
-
-    id: int = Field(..., description="Identifier of the item to delete.")
-    type: str = Field(..., description="Item type matching the stored record.")
-
-
-class RagRetrieveRequest(BaseModel):
-    """Payload for retrieving RAG items by semantic query."""
-
-    model_config = ConfigDict(populate_by_name=True, extra="ignore")
-
-    query: str = Field(..., description="Search query text.")
-    num: int = Field(..., description="Number of items to return.")
-    workspace_id: int = Field(..., alias="workspaceId", description="Workspace scope for the search.")
-    type: str = Field(..., description="Item type to search, e.g. METRIC.")
-
-
-class RagRetrieveResponse(BaseModel):
-    """Generic RAG retrieval response wrapper."""
-
-    model_config = ConfigDict(extra="allow")
-
-    data: List[Any] = Field(default_factory=list, description="Retrieved items.")
-
--- a/app/services/init.py
+++ b/app/services/init.py
@ -1,4 +1,3 @@
 from .gateway import LLMGateway
-from .rag_client import RagAPIClient

-__all__ = ["LLMGateway", "RagAPIClient"]
+__all__ = ["LLMGateway"]
--- a/app/services/gateway.py
+++ b/app/services/gateway.py
@ -1,93 +1,53 @@
 from __future__ import annotations

-import logging
+import os
+from typing import Dict, Type

 import httpx
-from pydantic import ValidationError

-from app.exceptions import ProviderAPICallError
-from app.models import LLMChoice, LLMMessage, LLMRequest, LLMResponse
-from app.settings import NEW_API_AUTH_TOKEN, NEW_API_BASE_URL
-
-
-logger = logging.getLogger(__name__)
+from app.exceptions import ProviderConfigurationError
+from app.models import LLMProvider, LLMRequest, LLMResponse
+from app.providers import (
+    AnthropicProvider,
+    DeepSeekProvider,
+    GeminiProvider,
+    LLMProviderClient,
+    OpenAIProvider,
+    OpenRouterProvider,
+    QwenProvider,
+)


 class LLMGateway:
-    """Forward chat requests to the configured new-api component."""
+    """Simple registry that dispatches chat requests to provider clients."""

-    def __init__(
-        self,
-        *,
-        base_url: str | None = None,
-        auth_token: str | None = None,
-    ) -> None:
-        resolved_base = base_url or NEW_API_BASE_URL
-        self._base_url = resolved_base.rstrip("/")
-        self._auth_token = auth_token or NEW_API_AUTH_TOKEN
+    def __init__(self) -> None:
+        self._providers: Dict[LLMProvider, LLMProviderClient] = {}
+        self._factory: Dict[LLMProvider, Type[LLMProviderClient]] = {
+            LLMProvider.OPENAI: OpenAIProvider,
+            LLMProvider.ANTHROPIC: AnthropicProvider,
+            LLMProvider.OPENROUTER: OpenRouterProvider,
+            LLMProvider.GEMINI: GeminiProvider,
+            LLMProvider.QWEN: QwenProvider,
+            LLMProvider.DEEPSEEK: DeepSeekProvider,
+        }
+
+    def get_provider(self, provider: LLMProvider) -> LLMProviderClient:
+        if provider not in self._factory:
+            raise ProviderConfigurationError(f"Unsupported provider '{provider.value}'.")
+
+        if provider not in self._providers:
+            self._providers[provider] = self._build_provider(provider)
+        return self._providers[provider]
+
+    def _build_provider(self, provider: LLMProvider) -> LLMProviderClient:
+        provider_cls = self._factory[provider]
+        api_key_env = getattr(provider_cls, "api_key_env", None)
+        api_key = os.getenv(api_key_env) if api_key_env else None
+        return provider_cls(api_key)

    async def chat(
        self, request: LLMRequest, client: httpx.AsyncClient
    ) -> LLMResponse:
-        url = f"{self._base_url}/v1/chat/completions"
-        payload = request.model_dump(mode="json", exclude_none=True)
-        headers = {"Content-Type": "application/json"}
-        if self._auth_token:
-            headers["Authorization"] = f"Bearer {self._auth_token}"
-        logger.info("Forwarding chat request to new-api at %s", url)
-        try:
-            response = await client.post(url, json=payload, headers=headers)
-            response.raise_for_status()
-        except httpx.HTTPStatusError as exc:
-            status_code = exc.response.status_code if exc.response else None
-            response_text = exc.response.text if exc.response else ""
-            logger.error(
-                "new-api upstream returned %s: %s",
-                status_code,
-                response_text,
-                exc_info=True,
-            )
-            raise ProviderAPICallError(
-                "Chat completion request failed.",
-                status_code=status_code,
-                response_text=response_text,
-            ) from exc
-        except httpx.HTTPError as exc:
-            logger.error("new-api transport error: %s", exc, exc_info=True)
-            raise ProviderAPICallError(f"Chat completion request failed: {exc}") from exc
-
-        try:
-            data = response.json()
-        except ValueError as exc:
-            logger.error("new-api responded with invalid JSON.", exc_info=True)
-            raise ProviderAPICallError(
-                "Chat completion response was not valid JSON."
-            ) from exc
-
-        logger.info("new-api payload: %s", data)
-        normalized_choices: list[LLMChoice] = []
-        for idx, choice in enumerate(data.get("choices", []) or []):
-            message_payload = choice.get("message") or {}
-            message = LLMMessage(
-                role=message_payload.get("role", "assistant"),
-                content=message_payload.get("content", ""),
-            )
-            normalized_choices.append(
-                LLMChoice(index=choice.get("index", idx), message=message)
-            )
-
-        try:
-            normalized_response = LLMResponse(
-                provider=request.provider,
-                model=data.get("model", request.model),
-                choices=normalized_choices,
-                raw=data,
-            )
-            return normalized_response
-        except ValidationError as exc:
-            logger.error(
-                "new-api response did not match expected schema: %s", data, exc_info=True
-            )
-            raise ProviderAPICallError(
-                "Chat completion response was not in the expected format."
-            ) from exc
+        provider_client = self.get_provider(request.provider)
+        return await provider_client.chat(request, client)
--- a/app/services/import_analysis.py
+++ b/app/services/import_analysis.py
@ -22,24 +22,13 @@ from app.models import (
    LLMResponse,
    LLMRole,
 )
-from app.settings import (
-    DEFAULT_IMPORT_MODEL,
-    NEW_API_AUTH_TOKEN,
-    NEW_API_BASE_URL,
-    get_supported_import_models,
-)
-from app.utils.llm_usage import extract_usage
+from app.settings import DEFAULT_IMPORT_MODEL, get_supported_import_models

 logger = logging.getLogger(__name__)

-IMPORT_GATEWAY_BASE_URL = os.getenv("IMPORT_GATEWAY_BASE_URL", NEW_API_BASE_URL)
-
-
-def build_import_gateway_headers() -> dict[str, str]:
-    headers = {"Content-Type": "application/json"}
-    if NEW_API_AUTH_TOKEN:
-        headers["Authorization"] = f"Bearer {NEW_API_AUTH_TOKEN}"
-    return headers
+IMPORT_GATEWAY_BASE_URL = os.getenv(
+    "IMPORT_GATEWAY_BASE_URL", "http://localhost:8000"
+)


 def _env_float(name: str, default: float) -> float:
@ -53,7 +42,7 @@ def _env_float(name: str, default: float) -> float:
        return default


-IMPORT_CHAT_TIMEOUT_SECONDS = _env_float("IMPORT_CHAT_TIMEOUT_SECONDS", 120.0)
+IMPORT_CHAT_TIMEOUT_SECONDS = _env_float("IMPORT_CHAT_TIMEOUT_SECONDS", 90.0)

 SUPPORTED_IMPORT_MODELS = get_supported_import_models()

@ -309,7 +298,7 @@ def parse_llm_analysis_json(llm_response: LLMResponse) -> Dict[str, Any]:
    try:
        return json.loads(json_payload)
    except json.JSONDecodeError as exc:
-        preview = json_payload[:10000]
+        preview = json_payload[:2000]
        logger.error("Failed to parse JSON from LLM response content: %s", preview, exc_info=True)
        raise ProviderAPICallError("LLM response JSON could not be parsed.") from exc

@ -324,18 +313,16 @@ async def dispatch_import_analysis_job(
    url = f"{IMPORT_GATEWAY_BASE_URL.rstrip('/')}/v1/chat/completions"

    logger.info(
-        "Dispatching import %s to %s using provider=%s model=%s",
+        "Dispatching import %s to %s: %s",
        request.import_record_id,
        url,
-        payload.get("provider"),
-        payload.get("model"),
+        json.dumps(payload, ensure_ascii=False),
    )

    timeout = httpx.Timeout(IMPORT_CHAT_TIMEOUT_SECONDS)
-    headers = build_import_gateway_headers()

    try:
-        response = await client.post(url, json=payload, timeout=timeout, headers=headers)
+        response = await client.post(url, json=payload, timeout=timeout)
        response.raise_for_status()
    except httpx.HTTPStatusError as exc:
        body_preview = ""
@ -360,10 +347,9 @@ async def dispatch_import_analysis_job(
        response.status_code,
    )
    logger.info(
-        "LLM response received for %s (status %s, choices=%s)",
+        "LLM response for %s: %s",
        request.import_record_id,
-        response.status_code,
-        len(response_data.get("choices") or []),
+        json.dumps(response_data, ensure_ascii=False),
    )

    try:
@ -389,6 +375,18 @@ async def dispatch_import_analysis_job(

    return result

+# 兼容处理多模型的使用量字段提取
+def extract_usage(resp_json: dict) -> dict:
+    usage = resp_json.get("usage") or resp_json.get("usageMetadata") or {}
+    return {
+        "prompt_tokens": usage.get("prompt_tokens") or usage.get("input_tokens") or usage.get("promptTokenCount"),
+        "completion_tokens": usage.get("completion_tokens") or usage.get("output_tokens") or usage.get("candidatesTokenCount"),
+        "total_tokens": usage.get("total_tokens") or usage.get("totalTokenCount") or (
+            (usage.get("prompt_tokens") or usage.get("input_tokens") or 0)
+            + (usage.get("completion_tokens") or usage.get("output_tokens") or 0)
+        )
+    }
+
 async def notify_import_analysis_callback(
    callback_url: str,
    payload: Dict[str, Any],
@ -417,7 +415,6 @@ async def process_import_analysis_job(
    request: DataImportAnalysisJobRequest,
    client: httpx.AsyncClient,
 ) -> None:
-    # Run the import analysis and ensure the callback fires regardless of success/failure.
    try:
        payload = await dispatch_import_analysis_job(request, client)
    except ProviderAPICallError as exc:
--- a/app/services/metric_store.py
+++ b/app/services/metric_store.py
@ -1,842 +0,0 @@
-from __future__ import annotations
-
-import hashlib
-import json
-import logging
-from datetime import datetime
-from typing import Any, Dict, Iterable, List, Optional
-from uuid import uuid4
-
-from sqlalchemy import text
-from sqlalchemy.engine import Row
-
-from app.db import get_engine
-from app.schemas.chat import (
-    ChatSessionCreate,
-    ChatSessionUpdate,
-    ChatTurnCreate,
-    ChatTurnRetrievalItem,
-)
-from app.schemas.metrics import (
-    MetricCreate,
-    MetricResultItem,
-    MetricResultsWriteRequest,
-    MetricRunTrigger,
-    MetricScheduleCreate,
-    MetricScheduleUpdate,
-    MetricUpdate,
-)
-
-
-logger = logging.getLogger(__name__)
-
-
-# Common helpers
-def _json_dump(value: Any) -> Optional[str]:
-    """Safe JSON dumper; returns None on failure to keep DB writes simple."""
-    if value is None:
-        return None
-    if isinstance(value, str):
-        return value
-    try:
-        return json.dumps(value, ensure_ascii=False)
-    except (TypeError, ValueError):
-        return None
-
-
-def _parse_json_fields(payload: Dict[str, Any], fields: Iterable[str]) -> Dict[str, Any]:
-    """Parse select fields from JSON strings into dict/list for responses."""
-    for field in fields:
-        raw = payload.get(field)
-        if raw is None or isinstance(raw, (dict, list)):
-            continue
-        if isinstance(raw, (bytes, bytearray)):
-            raw = raw.decode("utf-8", errors="ignore")
-        if isinstance(raw, str):
-            try:
-                payload[field] = json.loads(raw)
-            except ValueError:
-                pass
-    return payload
-
-
-def _row_to_dict(row: Row[Any]) -> Dict[str, Any]:
-    return dict(row._mapping)
-
-
-# Chat sessions & turns
-def create_chat_session(payload: ChatSessionCreate) -> Dict[str, Any]:
-    """Create a chat session row with optional external UUID."""
-    engine = get_engine()
-    session_uuid = payload.session_uuid or str(uuid4())
-    now = datetime.utcnow()
-    params = {
-        "user_id": payload.user_id,
-        "session_uuid": session_uuid,
-        "end_time": payload.end_time,
-        "status": payload.status or "OPEN",
-        "ext_context": _json_dump(payload.ext_context),
-    }
-    with engine.begin() as conn:
-        result = conn.execute(
-            text(
-                """
-                INSERT INTO chat_session (user_id, session_uuid, end_time, status, ext_context)
-                VALUES (:user_id, :session_uuid, :end_time, :status, :ext_context)
-                """
-            ),
-            params,
-        )
-        session_id = result.lastrowid
-        row = conn.execute(
-            text("SELECT * FROM chat_session WHERE id=:id"), {"id": session_id}
-        ).first()
-    if not row:
-        raise RuntimeError("Failed to create chat session.")
-    data = _row_to_dict(row)
-    _parse_json_fields(data, ["ext_context"])
-    return data
-
-
-def update_chat_session(session_id: int, payload: ChatSessionUpdate) -> Dict[str, Any]:
-    """Patch selected chat session fields."""
-    updates = {}
-    if payload.status is not None:
-        updates["status"] = payload.status
-    if payload.end_time is not None:
-        updates["end_time"] = payload.end_time
-    if payload.last_turn_id is not None:
-        updates["last_turn_id"] = payload.last_turn_id
-    if payload.ext_context is not None:
-        updates["ext_context"] = _json_dump(payload.ext_context)
-
-    if not updates:
-        current = get_chat_session(session_id)
-        if not current:
-            raise KeyError(f"Session {session_id} not found.")
-        return current
-
-    set_clause = ", ".join(f"{key}=:{key}" for key in updates.keys())
-    params = dict(updates)
-    params["id"] = session_id
-
-    engine = get_engine()
-    with engine.begin() as conn:
-        conn.execute(
-            text(f"UPDATE chat_session SET {set_clause} WHERE id=:id"),
-            params,
-        )
-        row = conn.execute(
-            text("SELECT * FROM chat_session WHERE id=:id"), {"id": session_id}
-        ).first()
-    if not row:
-        raise KeyError(f"Session {session_id} not found.")
-    data = _row_to_dict(row)
-    _parse_json_fields(data, ["ext_context"])
-    return data
-
-
-def close_chat_session(session_id: int) -> Dict[str, Any]:
-    """Mark a chat session as CLOSED with end_time."""
-    now = datetime.utcnow()
-    return update_chat_session(
-        session_id,
-        ChatSessionUpdate(status="CLOSED", end_time=now),
-    )
-
-
-def get_chat_session(session_id: int) -> Optional[Dict[str, Any]]:
-    engine = get_engine()
-    with engine.begin() as conn:
-        row = conn.execute(
-            text("SELECT * FROM chat_session WHERE id=:id"), {"id": session_id}
-        ).first()
-    if not row:
-        return None
-    data = _row_to_dict(row)
-    _parse_json_fields(data, ["ext_context"])
-    return data
-
-
-def list_chat_sessions(
-    *,
-    user_id: Optional[int] = None,
-    status: Optional[str] = None,
-    start_from: Optional[datetime] = None,
-    start_to: Optional[datetime] = None,
-    limit: int = 50,
-    offset: int = 0,
-) -> List[Dict[str, Any]]:
-    """List chat sessions with optional filters and pagination."""
-    conditions = []
-    params: Dict[str, Any] = {"limit": limit, "offset": offset}
-    if user_id is not None:
-        conditions.append("user_id=:user_id")
-        params["user_id"] = user_id
-    if status is not None:
-        conditions.append("status=:status")
-        params["status"] = status
-    if start_from is not None:
-        conditions.append("created_at>=:start_from")
-        params["start_from"] = start_from
-    if start_to is not None:
-        conditions.append("created_at<=:start_to")
-        params["start_to"] = start_to
-
-    where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
-    engine = get_engine()
-    with engine.begin() as conn:
-        rows = conn.execute(
-            text(
-                f"SELECT * FROM chat_session {where_clause} "
-                "ORDER BY created_at DESC LIMIT :limit OFFSET :offset"
-            ),
-            params,
-        ).fetchall()
-    results: List[Dict[str, Any]] = []
-    for row in rows:
-        data = _row_to_dict(row)
-        _parse_json_fields(data, ["ext_context"])
-        results.append(data)
-    return results
-
-
-def _next_turn_no(conn, session_id: int) -> int:
-    row = conn.execute(
-        text("SELECT COALESCE(MAX(turn_no), 0) + 1 AS next_no FROM chat_turn WHERE session_id=:sid"),
-        {"sid": session_id},
-    ).first()
-    if not row:
-        return 1
-    return int(row._mapping["next_no"])
-
-
-def create_chat_turn(session_id: int, payload: ChatTurnCreate) -> Dict[str, Any]:
-    """Insert a chat turn and auto-increment turn number within the session."""
-    engine = get_engine()
-    now = datetime.utcnow()
-    params = {
-        "session_id": session_id,
-        "user_id": payload.user_id,
-        "user_query": payload.user_query,
-        "intent": payload.intent,
-        "ast_json": _json_dump(payload.ast_json),
-        "generated_sql": payload.generated_sql,
-        "sql_status": payload.sql_status,
-        "error_msg": payload.error_msg,
-        "main_metric_ids": _json_dump(payload.main_metric_ids),
-        "created_metric_ids": _json_dump(payload.created_metric_ids),
-        "end_time": payload.end_time,
-    }
-    with engine.begin() as conn:
-        turn_no = _next_turn_no(conn, session_id)
-        params["turn_no"] = turn_no
-        result = conn.execute(
-            text(
-                """
-                INSERT INTO chat_turn (
-                    session_id, turn_no, user_id,
-                    user_query, intent, ast_json,
-                    generated_sql, sql_status, error_msg,
-                    main_metric_ids, created_metric_ids,
-                    end_time
-                )
-                VALUES (
-                    :session_id, :turn_no, :user_id,
-                    :user_query, :intent, :ast_json,
-                    :generated_sql, :sql_status, :error_msg,
-                    :main_metric_ids, :created_metric_ids,
-                     :end_time
-                )
-                """
-            ),
-            params,
-        )
-        turn_id = result.lastrowid
-        row = conn.execute(
-            text("SELECT * FROM chat_turn WHERE id=:id"), {"id": turn_id}
-        ).first()
-    if not row:
-        raise RuntimeError("Failed to create chat turn.")
-    data = _row_to_dict(row)
-    _parse_json_fields(data, ["ast_json", "main_metric_ids", "created_metric_ids"])
-    return data
-
-
-def get_chat_turn(turn_id: int) -> Optional[Dict[str, Any]]:
-    engine = get_engine()
-    with engine.begin() as conn:
-        row = conn.execute(
-            text("SELECT * FROM chat_turn WHERE id=:id"), {"id": turn_id}
-        ).first()
-    if not row:
-        return None
-    data = _row_to_dict(row)
-    _parse_json_fields(data, ["ast_json", "main_metric_ids", "created_metric_ids"])
-    return data
-
-
-def list_chat_turns(session_id: int) -> List[Dict[str, Any]]:
-    engine = get_engine()
-    with engine.begin() as conn:
-        rows = conn.execute(
-            text(
-                "SELECT * FROM chat_turn WHERE session_id=:session_id ORDER BY turn_no ASC"
-            ),
-            {"session_id": session_id},
-        ).fetchall()
-    results: List[Dict[str, Any]] = []
-    for row in rows:
-        data = _row_to_dict(row)
-        _parse_json_fields(data, ["ast_json", "main_metric_ids", "created_metric_ids"])
-        results.append(data)
-    return results
-
-
-def create_retrievals(turn_id: int, retrievals: List[ChatTurnRetrievalItem]) -> int:
-    """Batch insert retrieval records for a turn."""
-    if not retrievals:
-        return 0
-    engine = get_engine()
-    params_list = []
-    for item in retrievals:
-        params_list.append(
-            {
-                "turn_id": turn_id,
-                "item_type": item.item_type,
-                "item_id": item.item_id,
-                "item_extra": _json_dump(item.item_extra),
-                "similarity_score": item.similarity_score,
-                "rank_no": item.rank_no,
-                "used_in_reasoning": 1 if item.used_in_reasoning else 0,
-                "used_in_sql": 1 if item.used_in_sql else 0,
-            }
-        )
-    with engine.begin() as conn:
-        conn.execute(
-            text(
-                """
-                INSERT INTO chat_turn_retrieval (
-                    turn_id, item_type, item_id, item_extra,
-                    similarity_score, rank_no, used_in_reasoning, used_in_sql
-                )
-                VALUES (
-                    :turn_id, :item_type, :item_id, :item_extra,
-                    :similarity_score, :rank_no, :used_in_reasoning, :used_in_sql
-                )
-                """
-            ),
-            params_list,
-        )
-    return len(retrievals)
-
-
-def list_retrievals(turn_id: int) -> List[Dict[str, Any]]:
-    engine = get_engine()
-    with engine.begin() as conn:
-        rows = conn.execute(
-            text(
-                "SELECT * FROM chat_turn_retrieval WHERE turn_id=:turn_id ORDER BY created_at ASC, rank_no ASC"
-            ),
-            {"turn_id": turn_id},
-        ).fetchall()
-    results: List[Dict[str, Any]] = []
-    for row in rows:
-        data = _row_to_dict(row)
-        _parse_json_fields(data, ["item_extra"])
-        data["used_in_reasoning"] = bool(data.get("used_in_reasoning"))
-        data["used_in_sql"] = bool(data.get("used_in_sql"))
-        results.append(data)
-    return results
-
-
-# Metric registry
-def _metric_sql_hash(sql_text: str) -> str:
-    """Compute a stable hash to detect SQL definition changes."""
-    return hashlib.md5(sql_text.encode("utf-8")).hexdigest()
-
-
-def create_metric(payload: MetricCreate) -> Dict[str, Any]:
-    """Insert a new metric definition; version starts at 1."""
-    engine = get_engine()
-    now = datetime.utcnow()
-    sql_hash = _metric_sql_hash(payload.base_sql)
-    params = {
-        "metric_code": payload.metric_code,
-        "metric_name": payload.metric_name,
-        "metric_aliases": _json_dump(payload.metric_aliases),
-        "biz_domain": payload.biz_domain,
-        "biz_desc": payload.biz_desc,
-        "chat_turn_id": payload.chat_turn_id,
-        "tech_desc": payload.tech_desc,
-        "formula_expr": payload.formula_expr,
-        "base_sql": payload.base_sql,
-        "time_grain": payload.time_grain,
-        "dim_binding": _json_dump(payload.dim_binding),
-        "update_strategy": payload.update_strategy,
-        "schedule_id": payload.schedule_id,
-        "schedule_type": payload.schedule_type,
-        "version": 1,
-        "is_active": 1 if payload.is_active else 0,
-        "sql_hash": sql_hash,
-        "created_by": payload.created_by,
-        "updated_by": payload.updated_by,
-        "created_at": now,
-        "updated_at": now,
-    }
-    with engine.begin() as conn:
-        result = conn.execute(
-            text(
-                """
-                INSERT INTO metric_def (
-                    metric_code, metric_name, metric_aliases, biz_domain, biz_desc,
-                    chat_turn_id, tech_desc, formula_expr, base_sql,
-                    time_grain, dim_binding, update_strategy,
-                    schedule_id, schedule_type, version, is_active,
-                    sql_hash, created_by, updated_by, created_at, updated_at
-                )
-                VALUES (
-                    :metric_code, :metric_name, :metric_aliases, :biz_domain, :biz_desc,
-                    :chat_turn_id, :tech_desc, :formula_expr, :base_sql,
-                    :time_grain, :dim_binding, :update_strategy,
-                    :schedule_id, :schedule_type, :version, :is_active,
-                    :sql_hash, :created_by, :updated_by, :created_at, :updated_at
-                )
-                """
-            ),
-            params,
-        )
-        metric_id = result.lastrowid
-        row = conn.execute(
-            text("SELECT * FROM metric_def WHERE id=:id"), {"id": metric_id}
-        ).first()
-    if not row:
-        raise RuntimeError("Failed to create metric definition.")
-    data = _row_to_dict(row)
-    _parse_json_fields(data, ["metric_aliases", "dim_binding"])
-    data["is_active"] = bool(data.get("is_active"))
-    return data
-
-
-def update_metric(metric_id: int, payload: MetricUpdate) -> Dict[str, Any]:
-    """Update mutable fields of a metric definition and refresh sql_hash when needed."""
-    updates: Dict[str, Any] = {}
-    for field in (
-        "metric_name",
-        "biz_domain",
-        "biz_desc",
-        "tech_desc",
-        "formula_expr",
-        "base_sql",
-        "time_grain",
-        "update_strategy",
-        "schedule_id",
-        "schedule_type",
-        "updated_by",
-    ):
-        value = getattr(payload, field)
-        if value is not None:
-            updates[field] = value
-
-    if payload.metric_aliases is not None:
-        updates["metric_aliases"] = _json_dump(payload.metric_aliases)
-    if payload.dim_binding is not None:
-        updates["dim_binding"] = _json_dump(payload.dim_binding)
-    if payload.is_active is not None:
-        updates["is_active"] = 1 if payload.is_active else 0
-    if payload.base_sql is not None:
-        updates["sql_hash"] = _metric_sql_hash(payload.base_sql)
-
-    if not updates:
-        current = get_metric(metric_id)
-        if not current:
-            raise KeyError(f"Metric {metric_id} not found.")
-        return current
-
-    updates["updated_at"] = datetime.utcnow()
-    set_clause = ", ".join(f"{key}=:{key}" for key in updates.keys())
-    params = dict(updates)
-    params["id"] = metric_id
-
-    engine = get_engine()
-    with engine.begin() as conn:
-        conn.execute(
-            text(f"UPDATE metric_def SET {set_clause} WHERE id=:id"),
-            params,
-        )
-        row = conn.execute(
-            text("SELECT * FROM metric_def WHERE id=:id"), {"id": metric_id}
-        ).first()
-    if not row:
-        raise KeyError(f"Metric {metric_id} not found.")
-    data = _row_to_dict(row)
-    _parse_json_fields(data, ["metric_aliases", "dim_binding"])
-    data["is_active"] = bool(data.get("is_active"))
-    return data
-
-
-def get_metric(metric_id: int) -> Optional[Dict[str, Any]]:
-    engine = get_engine()
-    with engine.begin() as conn:
-        row = conn.execute(
-            text("SELECT * FROM metric_def WHERE id=:id"), {"id": metric_id}
-        ).first()
-    if not row:
-        return None
-    data = _row_to_dict(row)
-    _parse_json_fields(data, ["metric_aliases", "dim_binding"])
-    data["is_active"] = bool(data.get("is_active"))
-    return data
-
-
-def list_metrics(
-    *,
-    biz_domain: Optional[str] = None,
-    is_active: Optional[bool] = None,
-    keyword: Optional[str] = None,
-    limit: int = 100,
-    offset: int = 0,
-) -> List[Dict[str, Any]]:
-    """List metric definitions with simple filters and pagination."""
-    conditions = []
-    params: Dict[str, Any] = {"limit": limit, "offset": offset}
-    if biz_domain:
-        conditions.append("biz_domain=:biz_domain")
-        params["biz_domain"] = biz_domain
-    if is_active is not None:
-        conditions.append("is_active=:is_active")
-        params["is_active"] = 1 if is_active else 0
-    if keyword:
-        conditions.append("(metric_code LIKE :kw OR metric_name LIKE :kw)")
-        params["kw"] = f"%{keyword}%"
-
-    where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
-    engine = get_engine()
-    with engine.begin() as conn:
-        rows = conn.execute(
-            text(
-                f"SELECT * FROM metric_def {where_clause} "
-                "ORDER BY updated_at DESC LIMIT :limit OFFSET :offset"
-            ),
-            params,
-        ).fetchall()
-    results: List[Dict[str, Any]] = []
-    for row in rows:
-        data = _row_to_dict(row)
-        _parse_json_fields(data, ["metric_aliases", "dim_binding"])
-        data["is_active"] = bool(data.get("is_active"))
-        results.append(data)
-    return results
-
-
-# Metric schedules
-def create_metric_schedule(payload: MetricScheduleCreate) -> Dict[str, Any]:
-    """Create a schedule record for a metric."""
-    engine = get_engine()
-    params = {
-        "metric_id": payload.metric_id,
-        "cron_expr": payload.cron_expr,
-        "enabled": 1 if payload.enabled else 0,
-        "priority": payload.priority,
-        "backfill_allowed": 1 if payload.backfill_allowed else 0,
-        "max_runtime_sec": payload.max_runtime_sec,
-        "retry_times": payload.retry_times,
-        "owner_team": payload.owner_team,
-        "owner_user_id": payload.owner_user_id,
-    }
-    with engine.begin() as conn:
-        result = conn.execute(
-            text(
-                """
-                INSERT INTO metric_schedule (
-                    metric_id, cron_expr, enabled, priority,
-                    backfill_allowed, max_runtime_sec, retry_times,
-                    owner_team, owner_user_id
-                ) VALUES (
-                    :metric_id, :cron_expr, :enabled, :priority,
-                    :backfill_allowed, :max_runtime_sec, :retry_times,
-                    :owner_team, :owner_user_id
-                )
-                """
-            ),
-            params,
-        )
-        schedule_id = result.lastrowid
-        row = conn.execute(
-            text("SELECT * FROM metric_schedule WHERE id=:id"), {"id": schedule_id}
-        ).first()
-    if not row:
-        raise RuntimeError("Failed to create metric schedule.")
-    data = _row_to_dict(row)
-    data["enabled"] = bool(data.get("enabled"))
-    data["backfill_allowed"] = bool(data.get("backfill_allowed"))
-    return data
-
-
-def update_metric_schedule(schedule_id: int, payload: MetricScheduleUpdate) -> Dict[str, Any]:
-    updates: Dict[str, Any] = {}
-    for field in (
-        "cron_expr",
-        "priority",
-        "max_runtime_sec",
-        "retry_times",
-        "owner_team",
-        "owner_user_id",
-    ):
-        value = getattr(payload, field)
-        if value is not None:
-            updates[field] = value
-    if payload.enabled is not None:
-        updates["enabled"] = 1 if payload.enabled else 0
-    if payload.backfill_allowed is not None:
-        updates["backfill_allowed"] = 1 if payload.backfill_allowed else 0
-
-    if not updates:
-        current = list_schedules_for_metric(schedule_id=schedule_id)
-        if current:
-            return current[0]
-        raise KeyError(f"Schedule {schedule_id} not found.")
-
-    set_clause = ", ".join(f"{key}=:{key}" for key in updates.keys())
-    params = dict(updates)
-    params["id"] = schedule_id
-
-    engine = get_engine()
-    with engine.begin() as conn:
-        conn.execute(
-            text(f"UPDATE metric_schedule SET {set_clause} WHERE id=:id"),
-            params,
-        )
-        row = conn.execute(
-            text("SELECT * FROM metric_schedule WHERE id=:id"), {"id": schedule_id}
-        ).first()
-    if not row:
-        raise KeyError(f"Schedule {schedule_id} not found.")
-    data = _row_to_dict(row)
-    data["enabled"] = bool(data.get("enabled"))
-    data["backfill_allowed"] = bool(data.get("backfill_allowed"))
-    return data
-
-
-def list_schedules_for_metric(metric_id: Optional[int] = None, schedule_id: Optional[int] = None) -> List[Dict[str, Any]]:
-    conditions = []
-    params: Dict[str, Any] = {}
-    if metric_id is not None:
-        conditions.append("metric_id=:metric_id")
-        params["metric_id"] = metric_id
-    if schedule_id is not None:
-        conditions.append("id=:id")
-        params["id"] = schedule_id
-    where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
-    engine = get_engine()
-    with engine.begin() as conn:
-        rows = conn.execute(
-            text(f"SELECT * FROM metric_schedule {where_clause} ORDER BY id DESC"),
-            params,
-        ).fetchall()
-    results: List[Dict[str, Any]] = []
-    for row in rows:
-        data = _row_to_dict(row)
-        data["enabled"] = bool(data.get("enabled"))
-        data["backfill_allowed"] = bool(data.get("backfill_allowed"))
-        results.append(data)
-    return results
-
-
-# Metric runs
-def trigger_metric_run(payload: MetricRunTrigger) -> Dict[str, Any]:
-    """Create a metric_job_run entry; execution is orchestrated elsewhere."""
-    metric = get_metric(payload.metric_id)
-    if not metric:
-        raise KeyError(f"Metric {payload.metric_id} not found.")
-    metric_version = payload.metric_version or metric.get("version", 1)
-    base_sql_snapshot = payload.base_sql_snapshot or metric.get("base_sql")
-    triggered_at = payload.triggered_at or datetime.utcnow()
-
-    params = {
-        "metric_id": payload.metric_id,
-        "schedule_id": payload.schedule_id,
-        "source_turn_id": payload.source_turn_id,
-        "data_time_from": payload.data_time_from,
-        "data_time_to": payload.data_time_to,
-        "metric_version": metric_version,
-        "base_sql_snapshot": base_sql_snapshot,
-        "status": "RUNNING",
-        "error_msg": None,
-        "affected_rows": None,
-        "runtime_ms": None,
-        "triggered_by": payload.triggered_by,
-        "triggered_at": triggered_at,
-        "started_at": None,
-        "finished_at": None,
-    }
-    engine = get_engine()
-    with engine.begin() as conn:
-        result = conn.execute(
-            text(
-                """
-                INSERT INTO metric_job_run (
-                    metric_id, schedule_id, source_turn_id,
-                    data_time_from, data_time_to, metric_version,
-                    base_sql_snapshot, status, error_msg,
-                    affected_rows, runtime_ms,
-                    triggered_by, triggered_at, started_at, finished_at
-                ) VALUES (
-                    :metric_id, :schedule_id, :source_turn_id,
-                    :data_time_from, :data_time_to, :metric_version,
-                    :base_sql_snapshot, :status, :error_msg,
-                    :affected_rows, :runtime_ms,
-                    :triggered_by, :triggered_at, :started_at, :finished_at
-                )
-                """
-            ),
-            params,
-        )
-        run_id = result.lastrowid
-        row = conn.execute(
-            text("SELECT * FROM metric_job_run WHERE id=:id"), {"id": run_id}
-        ).first()
-    if not row:
-        raise RuntimeError("Failed to create metric job run.")
-    return _row_to_dict(row)
-
-
-def get_metric_run(run_id: int) -> Optional[Dict[str, Any]]:
-    engine = get_engine()
-    with engine.begin() as conn:
-        row = conn.execute(
-            text("SELECT * FROM metric_job_run WHERE id=:id"), {"id": run_id}
-        ).first()
-    if not row:
-        return None
-    return _row_to_dict(row)
-
-
-def list_metric_runs(
-    *,
-    metric_id: Optional[int] = None,
-    status: Optional[str] = None,
-    limit: int = 100,
-    offset: int = 0,
-) -> List[Dict[str, Any]]:
-    conditions = []
-    params: Dict[str, Any] = {"limit": limit, "offset": offset}
-    if metric_id is not None:
-        conditions.append("metric_id=:metric_id")
-        params["metric_id"] = metric_id
-    if status is not None:
-        conditions.append("status=:status")
-        params["status"] = status
-    where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
-    engine = get_engine()
-    with engine.begin() as conn:
-        rows = conn.execute(
-            text(
-                f"SELECT * FROM metric_job_run {where_clause} "
-                "ORDER BY triggered_at DESC LIMIT :limit OFFSET :offset"
-            ),
-            params,
-        ).fetchall()
-    return [_row_to_dict(row) for row in rows]
-
-
-# Metric results
-def write_metric_results(payload: MetricResultsWriteRequest) -> int:
-    """Bulk insert metric_result rows for a metric/version."""
-    metric = get_metric(payload.metric_id)
-    if not metric:
-        raise KeyError(f"Metric {payload.metric_id} not found.")
-    default_version = metric.get("version", 1)
-    now = datetime.utcnow()
-    rows: List[Dict[str, Any]] = []
-    for item in payload.results:
-        rows.append(
-            {
-                "metric_id": payload.metric_id,
-                "metric_version": item.metric_version or default_version,
-                "stat_time": item.stat_time,
-                "extra_dims": _json_dump(item.extra_dims),
-                "metric_value": item.metric_value,
-                "load_time": item.load_time or now,
-                "data_version": item.data_version,
-            }
-        )
-    if not rows:
-        return 0
-    engine = get_engine()
-    with engine.begin() as conn:
-        conn.execute(
-            text(
-                """
-                INSERT INTO metric_result (
-                    metric_id, metric_version, stat_time,
-                    extra_dims, metric_value, load_time, data_version
-                ) VALUES (
-                    :metric_id, :metric_version, :stat_time,
-                    :extra_dims, :metric_value, :load_time, :data_version
-                )
-                """
-            ),
-            rows,
-        )
-    return len(rows)
-
-
-def query_metric_results(
-    *,
-    metric_id: int,
-    stat_from: Optional[datetime] = None,
-    stat_to: Optional[datetime] = None,
-    limit: int = 200,
-    offset: int = 0,
-) -> List[Dict[str, Any]]:
-    conditions = ["metric_id=:metric_id"]
-    params: Dict[str, Any] = {
-        "metric_id": metric_id,
-        "limit": limit,
-        "offset": offset,
-    }
-    if stat_from is not None:
-        conditions.append("stat_time>=:stat_from")
-        params["stat_from"] = stat_from
-    if stat_to is not None:
-        conditions.append("stat_time<=:stat_to")
-        params["stat_to"] = stat_to
-
-    where_clause = "WHERE " + " AND ".join(conditions)
-    engine = get_engine()
-    with engine.begin() as conn:
-        rows = conn.execute(
-            text(
-                f"SELECT * FROM metric_result {where_clause} "
-                "ORDER BY stat_time DESC LIMIT :limit OFFSET :offset"
-            ),
-            params,
-        ).fetchall()
-    results: List[Dict[str, Any]] = []
-    for row in rows:
-        data = _row_to_dict(row)
-        _parse_json_fields(data, ["extra_dims"])
-        results.append(data)
-    return results
-
-
-def latest_metric_result(metric_id: int) -> Optional[Dict[str, Any]]:
-    engine = get_engine()
-    with engine.begin() as conn:
-        row = conn.execute(
-            text(
-                """
-                SELECT * FROM metric_result
-                WHERE metric_id=:metric_id
-                ORDER BY stat_time DESC
-                LIMIT 1
-                """
-            ),
-            {"metric_id": metric_id},
-        ).first()
-    if not row:
-        return None
-    data = _row_to_dict(row)
-    _parse_json_fields(data, ["extra_dims"])
-    return data
--- a/app/services/rag_client.py
+++ b/app/services/rag_client.py
@ -1,83 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import Any, Sequence
-
-import httpx
-
-from app.exceptions import ProviderAPICallError
-from app.schemas.rag import RagDeleteRequest, RagItemPayload, RagRetrieveRequest
-from app.settings import RAG_API_AUTH_TOKEN, RAG_API_BASE_URL
-
-
-logger = logging.getLogger(__name__)
-
-
-class RagAPIClient:
-    """Thin async client wrapper around the RAG endpoints described in doc/rag-api.md."""
-
-    def __init__(self, *, base_url: str | None = None, auth_token: str | None = None) -> None:
-        resolved_base = base_url or RAG_API_BASE_URL
-        self._base_url = resolved_base.rstrip("/")
-        self._auth_token = auth_token or RAG_API_AUTH_TOKEN
-
-    def _headers(self) -> dict[str, str]:
-        headers = {"Content-Type": "application/json"}
-        if self._auth_token:
-            headers["Authorization"] = f"Bearer {self._auth_token}"
-        return headers
-
-    async def _post(
-        self,
-        client: httpx.AsyncClient,
-        path: str,
-        payload: Any,
-    ) -> Any:
-        url = f"{self._base_url}{path}"
-        try:
-            response = await client.post(url, json=payload, headers=self._headers())
-            response.raise_for_status()
-        except httpx.HTTPStatusError as exc:
-            status_code = exc.response.status_code if exc.response else None
-            response_text = exc.response.text if exc.response else ""
-            logger.error(
-                "RAG API responded with an error (%s) for %s: %s",
-                status_code,
-                url,
-                response_text,
-                exc_info=True,
-            )
-            raise ProviderAPICallError(
-                "RAG API call failed.",
-                status_code=status_code,
-                response_text=response_text,
-            ) from exc
-        except httpx.HTTPError as exc:
-            logger.error("Transport error calling RAG API %s: %s", url, exc, exc_info=True)
-            raise ProviderAPICallError(f"RAG API call failed: {exc}") from exc
-
-        try:
-            return response.json()
-        except ValueError:
-            logger.warning("RAG API returned non-JSON response for %s; returning raw text.", url)
-            return {"raw": response.text}
-
-    async def add(self, client: httpx.AsyncClient, payload: RagItemPayload) -> Any:
-        body = payload.model_dump(by_alias=True, exclude_none=True)
-        return await self._post(client, "/rag/add", body)
-
-    async def add_batch(self, client: httpx.AsyncClient, items: Sequence[RagItemPayload]) -> Any:
-        body = [item.model_dump(by_alias=True, exclude_none=True) for item in items]
-        return await self._post(client, "/rag/addBatch", body)
-
-    async def update(self, client: httpx.AsyncClient, payload: RagItemPayload) -> Any:
-        body = payload.model_dump(by_alias=True, exclude_none=True)
-        return await self._post(client, "/rag/update", body)
-
-    async def delete(self, client: httpx.AsyncClient, payload: RagDeleteRequest) -> Any:
-        body = payload.model_dump(by_alias=True, exclude_none=True)
-        return await self._post(client, "/rag/delete", body)
-
-    async def retrieve(self, client: httpx.AsyncClient, payload: RagRetrieveRequest) -> Any:
-        body = payload.model_dump(by_alias=True, exclude_none=True)
-        return await self._post(client, "/rag/retrieve", body)
--- a/app/services/table_profiling.py
+++ b/app/services/table_profiling.py
@ -1,857 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import os
-import re
-from datetime import date, datetime
-from dataclasses import asdict, dataclass, is_dataclass
-from functools import lru_cache
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-import httpx
-import great_expectations as gx
-from great_expectations.core.batch import RuntimeBatchRequest
-from great_expectations.core.expectation_suite import ExpectationSuite
-from great_expectations.data_context import AbstractDataContext
-from great_expectations.exceptions import DataContextError, MetricResolutionError
-
-from app.exceptions import ProviderAPICallError
-from app.models import TableProfilingJobRequest
-from app.services import LLMGateway
-from app.settings import DEFAULT_IMPORT_MODEL
-from app.services.import_analysis import (
-    IMPORT_GATEWAY_BASE_URL,
-    resolve_provider_from_model,
-)
-from app.utils.llm_usage import extract_usage as extract_llm_usage
-
-
-logger = logging.getLogger(__name__)
-
-
-GE_REPORT_RELATIVE_PATH = Path("uncommitted") / "data_docs" / "local_site" / "index.html"
-PROMPT_FILENAMES = {
-    "ge_result_desc": "ge_result_desc_prompt.md",
-    "snippet_generator": "snippet_generator.md",
-    "snippet_alias": "snippet_alias_generator.md",
-}
-DEFAULT_CHAT_TIMEOUT_SECONDS = 180.0
-
-
-@dataclass
-class GEProfilingArtifacts:
-    profiling_result: Dict[str, Any]
-    profiling_summary: Dict[str, Any]
-    docs_path: str
-
-
-@dataclass
-class LLMCallResult:
-    data: Any
-    usage: Optional[Dict[str, Any]] = None
-
-
-class PipelineActionType:
-    GE_PROFILING = "ge_profiling"
-    GE_RESULT_DESC = "ge_result_desc"
-    SNIPPET = "snippet"
-    SNIPPET_ALIAS = "snippet_alias"
-
-
-def _project_root() -> Path:
-    return Path(__file__).resolve().parents[2]
-
-
-def _prompt_dir() -> Path:
-    return _project_root() / "prompt"
-
-
-@lru_cache(maxsize=None)
-def _load_prompt_parts(filename: str) -> Tuple[str, str]:
-    prompt_path = _prompt_dir() / filename
-    if not prompt_path.exists():
-        raise FileNotFoundError(f"Prompt template not found: {prompt_path}")
-
-    raw = prompt_path.read_text(encoding="utf-8")
-    splitter = "用户消息（User）"
-    if splitter not in raw:
-        raise ValueError(f"Prompt template '{filename}' missing separator '{splitter}'.")
-
-    system_raw, user_raw = raw.split(splitter, maxsplit=1)
-    system_text = system_raw.replace("系统角色（System）", "").strip()
-    user_text = user_raw.strip()
-    return system_text, user_text
-
-
-def _render_prompt(template_key: str, replacements: Dict[str, str]) -> Tuple[str, str]:
-    filename = PROMPT_FILENAMES[template_key]
-    system_text, user_template = _load_prompt_parts(filename)
-
-    rendered_user = user_template
-    for key, value in replacements.items():
-        rendered_user = rendered_user.replace(key, value)
-
-    return system_text, rendered_user
-
-
-def _extract_timeout_seconds(options: Optional[Dict[str, Any]]) -> Optional[float]:
-    if not options:
-        return None
-    value = options.get("llm_timeout_seconds")
-    if value is None:
-        return None
-    try:
-        timeout = float(value)
-        if timeout <= 0:
-            raise ValueError
-        return timeout
-    except (TypeError, ValueError):
-        logger.warning(
-            "Invalid llm_timeout_seconds value in extra_options: %r. Falling back to default.",
-            value,
-        )
-        return DEFAULT_CHAT_TIMEOUT_SECONDS
-
-
-def _extract_json_payload(content: str) -> str:
-    fenced = re.search(
-        r"```(?:json)?\s*([\s\S]+?)```",
-        content,
-        flags=re.IGNORECASE,
-    )
-    if fenced:
-        snippet = fenced.group(1).strip()
-        if snippet:
-            return snippet
-
-    stripped = content.strip()
-    if not stripped:
-        raise ValueError("Empty LLM content.")
-
-    decoder = json.JSONDecoder()
-    for idx, char in enumerate(stripped):
-        if char not in {"{", "["}:
-            continue
-        try:
-            _, end = decoder.raw_decode(stripped[idx:])
-        except json.JSONDecodeError:
-            continue
-        candidate = stripped[idx : idx + end].strip()
-        if candidate:
-            return candidate
-
-    return stripped
-
-
-def _parse_completion_payload(response_payload: Dict[str, Any]) -> Any:
-    choices = response_payload.get("choices") or []
-    if not choices:
-        raise ProviderAPICallError("LLM response did not contain choices to parse.")
-    message = choices[0].get("message") or {}
-    content = message.get("content") or ""
-    if not content.strip():
-        raise ProviderAPICallError("LLM response content is empty.")
-    json_payload = _extract_json_payload(content)
-    try:
-        return json.loads(json_payload)
-    except json.JSONDecodeError as exc:
-        preview = json_payload[:800]
-        logger.error("Failed to parse JSON from LLM response: %s", preview, exc_info=True)
-        raise ProviderAPICallError("LLM response JSON parsing failed.") from exc
-
-
-async def _post_callback(callback_url: str, payload: Dict[str, Any], client: httpx.AsyncClient) -> None:
-    safe_payload = _normalize_for_json(payload)
-    try:
-        logger.info(
-            "Posting pipeline action callback to %s: %s",
-            callback_url,
-            json.dumps(safe_payload, ensure_ascii=False),
-        )
-        response = await client.post(callback_url, json=safe_payload)
-        response.raise_for_status()
-    except httpx.HTTPError as exc:
-        logger.error("Callback delivery to %s failed: %s", callback_url, exc, exc_info=True)
-
-
-def _sanitize_value_set(value: Any, max_values: int) -> Tuple[Any, Optional[Dict[str, int]]]:
-    if not isinstance(value, list):
-        return value, None
-    original_len = len(value)
-    if original_len <= max_values:
-        return value, None
-    trimmed = value[:max_values]
-    return trimmed, {"original_length": original_len, "retained": max_values}
-
-
-def _sanitize_expectation_suite(suite: ExpectationSuite, max_value_set_values: int = 100) -> Dict[str, Any]:
-    suite_dict = suite.to_json_dict()
-    remarks: List[Dict[str, Any]] = []
-
-    for expectation in suite_dict.get("expectations", []):
-        kwargs = expectation.get("kwargs", {})
-        if "value_set" in kwargs:
-            sanitized_value, note = _sanitize_value_set(kwargs["value_set"], max_value_set_values)
-            kwargs["value_set"] = sanitized_value
-            if note:
-                expectation.setdefault("meta", {})
-                expectation["meta"]["value_set_truncated"] = note
-                remarks.append(
-                    {
-                        "column": kwargs.get("column"),
-                        "expectation": expectation.get("expectation_type"),
-                        "note": note,
-                    }
-                )
-
-    if remarks:
-        suite_dict.setdefault("meta", {})
-        suite_dict["meta"]["value_set_truncations"] = remarks
-
-    return suite_dict
-
-
-def _summarize_expectation_suite(suite_dict: Dict[str, Any]) -> Dict[str, Any]:
-    column_map: Dict[str, Dict[str, Any]] = {}
-    table_expectations: List[Dict[str, Any]] = []
-
-    for expectation in suite_dict.get("expectations", []):
-        expectation_type = expectation.get("expectation_type")
-        kwargs = expectation.get("kwargs", {})
-        column = kwargs.get("column")
-        summary_entry: Dict[str, Any] = {"expectation": expectation_type}
-
-        if "value_set" in kwargs and isinstance(kwargs["value_set"], list):
-            summary_entry["value_set_size"] = len(kwargs["value_set"])
-            summary_entry["value_set_preview"] = kwargs["value_set"][:5]
-
-        if column:
-            column_entry = column_map.setdefault(
-                column,
-                {"name": column, "expectations": []},
-            )
-            column_entry["expectations"].append(summary_entry)
-        else:
-            table_expectations.append(summary_entry)
-
-    summary = {
-        "column_profiles": list(column_map.values()),
-        "table_level_expectations": table_expectations,
-        "total_expectations": len(suite_dict.get("expectations", [])),
-    }
-    return summary
-
-
-def _sanitize_identifier(raw: Optional[str], fallback: str) -> str:
-    if not raw:
-        return fallback
-    candidate = re.sub(r"[^0-9A-Za-z_]+", "_", raw).strip("_")
-    return candidate or fallback
-
-
-def _format_connection_string(template: str, access_info: Dict[str, Any]) -> str:
-    if not access_info:
-        return template
-    try:
-        return template.format_map({k: v for k, v in access_info.items()})
-    except KeyError as exc:
-        missing = exc.args[0]
-        raise ValueError(f"table_access_info missing key '{missing}' required by connection_string.") from exc
-
-
-def _ensure_sql_runtime_datasource(
-    context: AbstractDataContext,
-    datasource_name: str,
-    connection_string: str,
-) -> None:
-    try:
-        datasource = context.get_datasource(datasource_name)
-    except (DataContextError, ValueError) as exc:
-        message = str(exc)
-        if "Could not find a datasource" in message or "Unable to load datasource" in message:
-            datasource = None
-        else:  # pragma: no cover - defensive
-            raise RuntimeError(f"Failed to inspect datasource '{datasource_name}'.") from exc
-    except Exception as exc:  # pragma: no cover - defensive
-        raise RuntimeError(f"Failed to inspect datasource '{datasource_name}'.") from exc
-
-    if datasource is not None:
-        execution_engine = getattr(datasource, "execution_engine", None)
-        current_conn = getattr(execution_engine, "connection_string", None)
-        if current_conn and current_conn != connection_string:
-            logger.info(
-                "Existing datasource %s uses different connection string; creating dedicated runtime datasource.",
-                datasource_name,
-            )
-            try:
-                context.delete_datasource(datasource_name)
-            except Exception as exc:  # pragma: no cover - defensive
-                logger.warning(
-                    "Failed to delete datasource %s before recreation: %s",
-                    datasource_name,
-                    exc,
-                )
-            else:
-                datasource = None
-
-    if datasource is not None:
-        return
-
-    runtime_datasource_config = {
-        "name": datasource_name,
-        "class_name": "Datasource",
-        "execution_engine": {
-            "class_name": "SqlAlchemyExecutionEngine",
-            "connection_string": connection_string,
-        },
-        "data_connectors": {
-            "runtime_connector": {
-                "class_name": "RuntimeDataConnector",
-                "batch_identifiers": ["default_identifier_name"],
-            }
-        },
-    }
-    try:
-        context.add_datasource(**runtime_datasource_config)
-    except Exception as exc:  # pragma: no cover - defensive
-        raise RuntimeError(f"Failed to create runtime datasource '{datasource_name}'.") from exc
-
-
-def _build_sql_runtime_batch_request(
-    context: AbstractDataContext,
-    request: TableProfilingJobRequest,
-) -> RuntimeBatchRequest:
-    link_info = request.table_link_info or {}
-    access_info = request.table_access_info or {}
-
-    connection_template = link_info.get("connection_string")
-    if not connection_template:
-        raise ValueError("table_link_info.connection_string is required when using table_link_info.")
-
-    connection_string = _format_connection_string(connection_template, access_info)
-
-    source_type = (link_info.get("type") or "sql").lower()
-    if source_type != "sql":
-        raise ValueError(f"Unsupported table_link_info.type='{source_type}'. Only 'sql' is supported.")
-
-    query = link_info.get("query")
-    table_name = link_info.get("table") or link_info.get("table_name")
-    schema_name = link_info.get("schema")
-
-    if not query and not table_name:
-        raise ValueError("Either table_link_info.query or table_link_info.table must be provided.")
-
-    if not query:
-        if not table_name:
-            raise ValueError("table_link_info.table must be provided when query is omitted.")
-
-        identifier = re.compile(r"^[A-Za-z_][A-Za-z0-9_$]*$")
-
-        def _quote(name: str) -> str:
-            if identifier.match(name):
-                return name
-            return f"`{name.replace('`', '``')}`"
-
-        if schema_name:
-            schema_part = schema_name if "." not in schema_name else schema_name.split(".")[-1]
-            table_part = table_name if "." not in table_name else table_name.split(".")[-1]
-            qualified_table = f"{_quote(schema_part)}.{_quote(table_part)}"
-        else:
-            qualified_table = _quote(table_name)
-
-        query = f"SELECT * FROM {qualified_table}"
-        limit = link_info.get("limit")
-        if isinstance(limit, int) and limit > 0:
-            query = f"{query} LIMIT {limit}"
-
-    datasource_name = request.ge_datasource_name or _sanitize_identifier(
-        f"{request.table_id}_runtime_ds", "runtime_ds"
-    )
-    data_asset_name = request.ge_data_asset_name or _sanitize_identifier(
-        table_name or "runtime_query", "runtime_query"
-    )
-
-    _ensure_sql_runtime_datasource(context, datasource_name, connection_string)
-
-    batch_identifiers = {
-        "default_identifier_name": f"{request.table_id}:{request.version_ts}",
-    }
-
-    return RuntimeBatchRequest(
-        datasource_name=datasource_name,
-        data_connector_name="runtime_connector",
-        data_asset_name=data_asset_name,
-        runtime_parameters={"query": query},
-        batch_identifiers=batch_identifiers,
-    )
-
-
-def _run_onboarding_assistant(
-    context: AbstractDataContext,
-    batch_request: Any,
-    suite_name: str,
-) -> Tuple[ExpectationSuite, Any]:
-    assistant = context.assistants.onboarding
-    assistant_result = assistant.run(batch_request=batch_request)
-    suite = assistant_result.get_expectation_suite(expectation_suite_name=suite_name)
-    context.save_expectation_suite(suite, expectation_suite_name=suite_name)
-    validation_getter = getattr(assistant_result, "get_validation_result", None)
-    if callable(validation_getter):
-        validation_result = validation_getter()
-    else:
-        validation_result = getattr(assistant_result, "validation_result", None)
-    if validation_result is None:
-        # Fallback: rerun validation using the freshly generated expectation suite.
-        validator = context.get_validator(
-            batch_request=batch_request,
-            expectation_suite_name=suite_name,
-        )
-        validation_result = validator.validate()
-    return suite, validation_result
-
-
-def _resolve_context(request: TableProfilingJobRequest) -> AbstractDataContext:
-    context_kwargs: Dict[str, Any] = {}
-    if request.ge_data_context_root:
-        context_kwargs["project_root_dir"] = request.ge_data_context_root
-    elif os.environ.get("GE_DATA_CONTEXT_ROOT"):
-        context_kwargs["project_root_dir"] = os.environ["GE_DATA_CONTEXT_ROOT"]
-    else:
-        context_kwargs["project_root_dir"] = str(_project_root())
-
-    return gx.get_context(**context_kwargs)
-
-
-def _build_batch_request(
-    context: AbstractDataContext,
-    request: TableProfilingJobRequest,
-) -> Any:
-    if request.ge_batch_request:
-        from great_expectations.core.batch import BatchRequest
-
-        return BatchRequest(**request.ge_batch_request)
-
-    if request.table_link_info:
-        return _build_sql_runtime_batch_request(context, request)
-
-    if not request.ge_datasource_name or not request.ge_data_asset_name:
-        raise ValueError(
-            "ge_batch_request or (ge_datasource_name and ge_data_asset_name) must be provided."
-        )
-
-    datasource = context.get_datasource(request.ge_datasource_name)
-    data_asset = datasource.get_asset(request.ge_data_asset_name)
-    return data_asset.build_batch_request()
-
-
-async def _run_ge_profiling(request: TableProfilingJobRequest) -> GEProfilingArtifacts:
-    def _execute() -> GEProfilingArtifacts:
-        context = _resolve_context(request)
-        suite_name = (
-            request.ge_expectation_suite_name
-            or f"{request.table_id}_profiling"
-        )
-
-        batch_request = _build_batch_request(context, request)
-        try:
-            context.get_expectation_suite(suite_name)
-        except DataContextError:
-            context.add_expectation_suite(suite_name)
-
-        validator = context.get_validator(
-            batch_request=batch_request,
-            expectation_suite_name=suite_name,
-        )
-
-        profiler_type = (request.ge_profiler_type or "user_configurable").lower()
-
-        if profiler_type == "data_assistant":
-            suite, validation_result = _run_onboarding_assistant(
-                context,
-                batch_request,
-                suite_name,
-            )
-        else:
-            try:
-                from great_expectations.profile.user_configurable_profiler import (
-                    UserConfigurableProfiler,
-                )
-            except ImportError as err:  # pragma: no cover - dependency guard
-                raise RuntimeError(
-                    "UserConfigurableProfiler is unavailable; install great_expectations profiling extra or switch profiler."
-                ) from err
-
-            profiler = UserConfigurableProfiler(profile_dataset=validator)
-            try:
-                suite = profiler.build_suite()
-                context.save_expectation_suite(suite, expectation_suite_name=suite_name)
-                validator.expectation_suite = suite
-                validation_result = validator.validate()
-            except MetricResolutionError as exc:
-                logger.warning(
-                    "UserConfigurableProfiler failed (%s); falling back to data assistant profiling.",
-                    exc,
-                )
-                suite, validation_result = _run_onboarding_assistant(
-                    context,
-                    batch_request,
-                    suite_name,
-                )
-
-        sanitized_suite = _sanitize_expectation_suite(suite)
-        summary = _summarize_expectation_suite(sanitized_suite)
-        validation_dict = validation_result.to_json_dict()
-
-        context.build_data_docs()
-        docs_path = Path(context.root_directory) / GE_REPORT_RELATIVE_PATH
-
-        profiling_result = {
-            "expectation_suite": sanitized_suite,
-            "validation_result": validation_dict,
-            "batch_request": getattr(batch_request, "to_json_dict", lambda: None)() or getattr(batch_request, "dict", lambda: None)(),
-        }
-
-        return GEProfilingArtifacts(
-            profiling_result=profiling_result,
-            profiling_summary=summary,
-            docs_path=str(docs_path),
-        )
-
-    return await asyncio.to_thread(_execute)
-
-
-async def _call_chat_completions(
-    *,
-    model_spec: str,
-    system_prompt: str,
-    user_prompt: str,
-    client: httpx.AsyncClient,
-    temperature: float = 0.2,
-    timeout_seconds: Optional[float] = None,
-) -> Any:
-    provider, model_name = resolve_provider_from_model(model_spec)
-    payload = {
-        "provider": provider.value,
-        "model": model_name,
-        "messages": [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt},
-        ],
-        "temperature": temperature,
-    }
-    payload_size_bytes = len(json.dumps(payload, ensure_ascii=False).encode("utf-8"))
-
-    url = f"{IMPORT_GATEWAY_BASE_URL.rstrip('/')}/v1/chat/completions"
-    try:
-        # log the request whole info
-        logger.info(
-            "Calling chat completions API %s with model %s and size %s and payload %s",
-            url,
-            model_name,
-            payload_size_bytes,
-            payload,
-        )
-        response = await client.post(url, json=payload, timeout=timeout_seconds)
-
-        response.raise_for_status()
-    except httpx.HTTPError as exc:
-        error_name = exc.__class__.__name__
-        detail = str(exc).strip()
-        if detail:
-            message = f"Chat completions request failed ({error_name}): {detail}"
-        else:
-            message = f"Chat completions request failed ({error_name})."
-        raise ProviderAPICallError(message) from exc
-
-    try:
-        response_payload = response.json()
-    except ValueError as exc:
-        raise ProviderAPICallError("Chat completions response was not valid JSON.") from exc
-
-    parsed_payload = _parse_completion_payload(response_payload)
-    usage_info = extract_llm_usage(response_payload)
-    return LLMCallResult(data=parsed_payload, usage=usage_info)
-
-
-def _normalize_for_json(value: Any) -> Any:
-    if value is None or isinstance(value, (str, int, float, bool)):
-        return value
-    if isinstance(value, (datetime, date)):
-        return str(value)
-    if hasattr(value, "model_dump"):
-        try:
-            return value.model_dump()
-        except Exception:  # pragma: no cover - defensive
-            pass
-    if is_dataclass(value):
-        return asdict(value)
-    if isinstance(value, dict):
-        return {k: _normalize_for_json(v) for k, v in value.items()}
-    if isinstance(value, (list, tuple, set)):
-        return [_normalize_for_json(v) for v in value]
-    if hasattr(value, "to_json_dict"):
-        try:
-            return value.to_json_dict()
-        except Exception:  # pragma: no cover - defensive
-            pass
-    if hasattr(value, "__dict__"):
-        return _normalize_for_json(value.__dict__)
-    return repr(value)
-
-
-def _json_dumps(data: Any) -> str:
-    normalised = _normalize_for_json(data)
-    return json.dumps(normalised, ensure_ascii=False, indent=2)
-
-
-def _preview_for_log(data: Any) -> str:
-    try:
-        serialised = _json_dumps(data)
-    except Exception:
-        serialised = repr(data)
-
-    return serialised
-
-
-def _profiling_request_for_log(request: TableProfilingJobRequest) -> Dict[str, Any]:
-    payload = request.model_dump()
-    access_info = payload.get("table_access_info")
-    if isinstance(access_info, dict):
-        payload["table_access_info"] = {key: "***" for key in access_info.keys()}
-    return payload
-
-
-async def _execute_result_desc(
-    profiling_json: Dict[str, Any],
-    _request: TableProfilingJobRequest,
-    llm_model: str,
-    client: httpx.AsyncClient,
-    timeout_seconds: Optional[float],
-) -> Dict[str, Any]:
-    system_prompt, user_prompt = _render_prompt(
-        "ge_result_desc",
-        {"{{GE_RESULT_JSON}}": _json_dumps(profiling_json)},
-    )
-    llm_output = await _call_chat_completions(
-        model_spec=llm_model,
-        system_prompt=system_prompt,
-        user_prompt=user_prompt,
-        client=client,
-        timeout_seconds=timeout_seconds,
-    )
-    if not isinstance(llm_output.data, dict):
-        raise ProviderAPICallError("GE result description payload must be a JSON object.")
-    return llm_output
-
-
-async def _execute_snippet_generation(
-    table_desc_json: Dict[str, Any],
-    _request: TableProfilingJobRequest,
-    llm_model: str,
-    client: httpx.AsyncClient,
-    timeout_seconds: Optional[float],
-) -> List[Dict[str, Any]]:
-    system_prompt, user_prompt = _render_prompt(
-        "snippet_generator",
-        {"{{TABLE_PROFILE_JSON}}": _json_dumps(table_desc_json)},
-    )
-    llm_output = await _call_chat_completions(
-        model_spec=llm_model,
-        system_prompt=system_prompt,
-        user_prompt=user_prompt,
-        client=client,
-        timeout_seconds=timeout_seconds,
-    )
-    if not isinstance(llm_output.data, list):
-        raise ProviderAPICallError("Snippet generator must return a JSON array.")
-    return llm_output
-
-
-async def _execute_snippet_alias(
-    snippets_json: List[Dict[str, Any]],
-    _request: TableProfilingJobRequest,
-    llm_model: str,
-    client: httpx.AsyncClient,
-    timeout_seconds: Optional[float],
-) -> List[Dict[str, Any]]:
-    system_prompt, user_prompt = _render_prompt(
-        "snippet_alias",
-        {"{{SNIPPET_ARRAY}}": _json_dumps(snippets_json)},
-    )
-    llm_output = await _call_chat_completions(
-        model_spec=llm_model,
-        system_prompt=system_prompt,
-        user_prompt=user_prompt,
-        client=client,
-        timeout_seconds=timeout_seconds,
-    )
-    if not isinstance(llm_output.data, list):
-        raise ProviderAPICallError("Snippet alias generator must return a JSON array.")
-    return llm_output
-
-
-async def _run_action_with_callback(
-    *,
-    action_type: str,
-    runner,
-    callback_base: Dict[str, Any],
-    client: httpx.AsyncClient,
-    callback_url: str,
-    input_payload: Any = None,
-    model_spec: Optional[str] = None,
-) -> Any:
-    if input_payload is not None:
-        logger.info(
-            "Pipeline action %s input: %s",
-            action_type,
-            _preview_for_log(input_payload),
-        )
-    try:
-        result = await runner()
-    except Exception as exc:
-        failure_payload = dict(callback_base)
-        failure_payload.update(
-            {
-                "status": "failed",
-                "action_type": action_type,
-                "error": str(exc),
-            }
-        )
-        if model_spec is not None:
-            failure_payload["model"] = model_spec
-        await _post_callback(callback_url, failure_payload, client)
-        raise
-
-    usage_info: Optional[Dict[str, Any]] = None
-    result_payload = result
-    if isinstance(result, LLMCallResult):
-        usage_info = result.usage
-        result_payload = result.data
-
-    success_payload = dict(callback_base)
-    success_payload.update(
-        {
-            "status": "success",
-            "action_type": action_type,
-        }
-    )
-    if model_spec is not None:
-        success_payload["model"] = model_spec
-
-    logger.info(
-        "Pipeline action %s output: %s",
-        action_type,
-        _preview_for_log(result_payload),
-    )
-
-    if action_type == PipelineActionType.GE_PROFILING:
-        artifacts: GEProfilingArtifacts = result_payload
-        success_payload["ge_profiling_json"] = artifacts.profiling_result
-        success_payload["ge_profiling_summary"] = artifacts.profiling_summary
-        success_payload["ge_report_path"] = artifacts.docs_path
-    elif action_type == PipelineActionType.GE_RESULT_DESC:
-        success_payload["ge_result_desc_json"] = result_payload
-    elif action_type == PipelineActionType.SNIPPET:
-        success_payload["snippet_json"] = result_payload
-    elif action_type == PipelineActionType.SNIPPET_ALIAS:
-        success_payload["snippet_alias_json"] = result_payload
-
-    if usage_info:
-        success_payload["llm_usage"] = usage_info
-
-    await _post_callback(callback_url, success_payload, client)
-    return result_payload
-
-
-async def process_table_profiling_job(
-    request: TableProfilingJobRequest,
-    _gateway: LLMGateway,
-    client: httpx.AsyncClient,
-) -> None:
-    """Sequentially execute the four-step profiling pipeline and emit callbacks per action."""
-
-    timeout_seconds = _extract_timeout_seconds(request.extra_options)
-    if timeout_seconds is None:
-        timeout_seconds = DEFAULT_CHAT_TIMEOUT_SECONDS
-
-    base_payload = {
-        "table_id": request.table_id,
-        "version_ts": request.version_ts,
-        "callback_url": str(request.callback_url),
-        "table_schema": request.table_schema,
-        "table_schema_version_id": request.table_schema_version_id,
-        "llm_model": request.llm_model,
-        "llm_timeout_seconds": timeout_seconds,
-        "workspace_id": request.workspace_id,
-        "rag_item_type": request.rag_item_type,
-    }
-
-    logging_request_payload = _profiling_request_for_log(request)
-
-    try:
-        artifacts: GEProfilingArtifacts = await _run_action_with_callback(
-            action_type=PipelineActionType.GE_PROFILING,
-            runner=lambda: _run_ge_profiling(request),
-            callback_base=base_payload,
-            client=client,
-            callback_url=str(request.callback_url),
-            input_payload=logging_request_payload,
-            model_spec=request.llm_model,
-        )
-
-        table_desc_json: Dict[str, Any] = await _run_action_with_callback(
-            action_type=PipelineActionType.GE_RESULT_DESC,
-            runner=lambda: _execute_result_desc(
-                artifacts.profiling_result,
-                request,
-                request.llm_model,
-                client,
-                timeout_seconds,
-            ),
-            callback_base=base_payload,
-            client=client,
-            callback_url=str(request.callback_url),
-            input_payload=artifacts.profiling_result,
-            model_spec=request.llm_model,
-        )
-
-        snippet_json: List[Dict[str, Any]] = await _run_action_with_callback(
-            action_type=PipelineActionType.SNIPPET,
-            runner=lambda: _execute_snippet_generation(
-                table_desc_json,
-                request,
-                request.llm_model,
-                client,
-                timeout_seconds,
-            ),
-            callback_base=base_payload,
-            client=client,
-            callback_url=str(request.callback_url),
-            input_payload=table_desc_json,
-            model_spec=request.llm_model,
-        )
-
-        await _run_action_with_callback(
-            action_type=PipelineActionType.SNIPPET_ALIAS,
-            runner=lambda: _execute_snippet_alias(
-                snippet_json,
-                request,
-                request.llm_model,
-                client,
-                timeout_seconds,
-            ),
-            callback_base=base_payload,
-            client=client,
-            callback_url=str(request.callback_url),
-            input_payload=snippet_json,
-            model_spec=request.llm_model,
-        )
-    except Exception:  # pragma: no cover - defensive catch
-        logger.exception(
-            "Table profiling pipeline failed for table_id=%s version_ts=%s",
-            request.table_id,
-            request.version_ts,
-        )
--- a/app/services/table_snippet.py
+++ b/app/services/table_snippet.py
@ -1,640 +0,0 @@
-from __future__ import annotations
-
-import hashlib
-import json
-import logging
-from datetime import datetime
-from typing import Any, Dict, List, Optional, Sequence, Tuple
-
-from sqlalchemy import text
-from sqlalchemy.engine import Engine
-from sqlalchemy.exc import SQLAlchemyError
-
-from app.db import get_engine
-from app.models import ActionType, TableSnippetUpsertRequest, TableSnippetUpsertResponse
-from app.schemas.rag import RagItemPayload
-from app.services.rag_client import RagAPIClient
-
-
-logger = logging.getLogger(__name__)
-
-
-def _serialize_json(value: Any) -> Tuple[str | None, int | None]:
-    logger.debug("Serializing JSON payload: %s", value)
-    if value is None:
-        return None, None
-    if isinstance(value, str):
-        encoded = value.encode("utf-8")
-        return value, len(encoded)
-    serialized = json.dumps(value, ensure_ascii=False)
-    encoded = serialized.encode("utf-8")
-    return serialized, len(encoded)
-
-
-def _prepare_table_schema(value: Any) -> str:
-    logger.debug("Preparing table_schema payload.")
-    if isinstance(value, str):
-        return value
-    return json.dumps(value, ensure_ascii=False)
-
-
-def _prepare_model_params(params: Dict[str, Any] | None) -> str | None:
-    if not params:
-        return None
-    serialized, _ = _serialize_json(params)
-    return serialized
-
-
-def _collect_common_columns(request: TableSnippetUpsertRequest) -> Dict[str, Any]:
-    # Build the base column set shared by all action types; action-specific fields are populated later.
-    logger.debug(
-        "Collecting common columns for table_id=%s version_ts=%s action_type=%s",
-        request.table_id,
-        request.version_ts,
-        request.action_type,
-    )
-    payload: Dict[str, Any] = {
-        "table_id": request.table_id,
-        "version_ts": request.version_ts,
-        "action_type": request.action_type.value,
-        "status": request.status.value,
-        "callback_url": str(request.callback_url),
-        "table_schema_version_id": request.table_schema_version_id,
-        "table_schema": _prepare_table_schema(request.table_schema),
-        "model": request.model,
-        "model_provider": request.model_provider,
-    }
-
-    payload.update(
-        {
-            "ge_profiling_json": None,
-            "ge_profiling_json_size_bytes": None,
-            "ge_profiling_summary": None,
-            "ge_profiling_summary_size_bytes": None,
-            "ge_profiling_total_size_bytes": None,
-            "ge_profiling_html_report_url": None,
-            "ge_result_desc_json": None,
-            "ge_result_desc_json_size_bytes": None,
-            "snippet_json": None,
-            "snippet_json_size_bytes": None,
-            "snippet_alias_json": None,
-            "snippet_alias_json_size_bytes": None,
-        }
-    )
-
-    payload["model_params"] = _prepare_model_params(request.model_params)
-
-    if request.llm_usage is not None:
-        llm_usage_json, _ = _serialize_json(request.llm_usage)
-        if llm_usage_json is not None:
-            payload["llm_usage"] = llm_usage_json
-
-    if request.error_code is not None:
-        logger.debug("Adding error_code: %s", request.error_code)
-        payload["error_code"] = request.error_code
-    if request.error_message is not None:
-        logger.debug("Adding error_message: %s", request.error_message)
-        payload["error_message"] = request.error_message
-    if request.started_at is not None:
-        payload["started_at"] = request.started_at
-    if request.finished_at is not None:
-        payload["finished_at"] = request.finished_at
-    if request.duration_ms is not None:
-        payload["duration_ms"] = request.duration_ms
-    if request.result_checksum is not None:
-        payload["result_checksum"] = request.result_checksum
-
-    logger.debug("Collected common payload: %s", payload)
-    return payload
-
-
-def _apply_action_payload(
-    request: TableSnippetUpsertRequest,
-    payload: Dict[str, Any],
-) -> None:
-    logger.debug("Applying action-specific payload for action_type=%s", request.action_type)
-    if request.action_type == ActionType.GE_PROFILING:
-        full_json, full_size = _serialize_json(request.ge_profiling_json)
-        summary_json, summary_size = _serialize_json(request.ge_profiling_summary)
-        if full_json is not None:
-            payload["ge_profiling_json"] = full_json
-            payload["ge_profiling_json_size_bytes"] = full_size
-        if summary_json is not None:
-            payload["ge_profiling_summary"] = summary_json
-            payload["ge_profiling_summary_size_bytes"] = summary_size
-        if request.ge_profiling_total_size_bytes is not None:
-            payload["ge_profiling_total_size_bytes"] = request.ge_profiling_total_size_bytes
-        elif full_size is not None or summary_size is not None:
-            payload["ge_profiling_total_size_bytes"] = (full_size or 0) + (summary_size or 0)
-        if request.ge_profiling_html_report_url:
-            payload["ge_profiling_html_report_url"] = request.ge_profiling_html_report_url
-    elif request.action_type == ActionType.GE_RESULT_DESC:
-        full_json, full_size = _serialize_json(request.ge_result_desc_json)
-        if full_json is not None:
-            payload["ge_result_desc_json"] = full_json
-            payload["ge_result_desc_json_size_bytes"] = full_size
-    elif request.action_type == ActionType.SNIPPET:
-        full_json, full_size = _serialize_json(request.snippet_json)
-        if full_json is not None:
-            payload["snippet_json"] = full_json
-            payload["snippet_json_size_bytes"] = full_size
-    elif request.action_type == ActionType.SNIPPET_ALIAS:
-        full_json, full_size = _serialize_json(request.snippet_alias_json)
-        if full_json is not None:
-            payload["snippet_alias_json"] = full_json
-            payload["snippet_alias_json_size_bytes"] = full_size
-    else:
-        logger.error("Unsupported action type encountered: %s", request.action_type)
-        raise ValueError(f"Unsupported action type '{request.action_type}'.")
-
-    logger.debug("Payload after applying action-specific data: %s", payload)
-
-
-def _build_insert_statement(columns: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]:
-    logger.debug("Building insert statement for columns: %s", list(columns.keys()))
-    column_names = list(columns.keys())
-    placeholders = [f":{name}" for name in column_names]
-    update_assignments = [
-        f"{name}=VALUES({name})"
-        for name in column_names
-        if name not in {"table_id", "version_ts", "action_type"}
-    ]
-    update_assignments.append("updated_at=CURRENT_TIMESTAMP")
-
-    sql = (
-        "INSERT INTO action_results ({cols}) VALUES ({vals}) "
-        "ON DUPLICATE KEY UPDATE {updates}"
-    ).format(
-        cols=", ".join(column_names),
-        vals=", ".join(placeholders),
-        updates=", ".join(update_assignments),
-    )
-    logger.debug("Generated SQL: %s", sql)
-    return sql, columns
-
-
-def _execute_upsert(engine: Engine, sql: str, params: Dict[str, Any]) -> int:
-    logger.info("Executing upsert for table_id=%s version_ts=%s action_type=%s", params.get("table_id"), params.get("version_ts"), params.get("action_type"))
-    with engine.begin() as conn:
-        result = conn.execute(text(sql), params)
-    logger.info("Rows affected: %s", result.rowcount)
-    return result.rowcount
-
-
-def upsert_action_result(request: TableSnippetUpsertRequest) -> TableSnippetUpsertResponse:
-    logger.info(
-        "Received upsert request: table_id=%s version_ts=%s action_type=%s status=%s",
-        request.table_id,
-        request.version_ts,
-        request.action_type,
-        request.status,
-    )
-    logger.debug("Request payload: %s", request.model_dump())
-    columns = _collect_common_columns(request)
-    _apply_action_payload(request, columns)
-
-    sql, params = _build_insert_statement(columns)
-    logger.debug("Final SQL params: %s", params)
-
-    engine = get_engine()
-    try:
-        rowcount = _execute_upsert(engine, sql, params)
-    except SQLAlchemyError as exc:
-        logger.exception(
-            "Failed to upsert action result: table_id=%s version_ts=%s action_type=%s",
-            request.table_id,
-            request.version_ts,
-            request.action_type,
-        )
-        raise RuntimeError(f"Database operation failed: {exc}") from exc
-
-    updated = rowcount > 1
-    return TableSnippetUpsertResponse(
-        table_id=request.table_id,
-        version_ts=request.version_ts,
-        action_type=request.action_type,
-        status=request.status,
-        updated=updated,
-    )
-
-
-def _decode_json_field(value: Any) -> Any:
-    """Decode JSON columns that may be returned as str/bytes/dicts/lists."""
-    if value is None:
-        return None
-    if isinstance(value, (dict, list)):
-        return value
-    if isinstance(value, (bytes, bytearray)):
-        try:
-            value = value.decode("utf-8")
-        except Exception:  # pragma: no cover - defensive
-            return None
-    if isinstance(value, str):
-        try:
-            return json.loads(value)
-        except json.JSONDecodeError:
-            logger.warning("Failed to decode JSON field: %s", value)
-            return None
-    return None
-
-
-def _coerce_json_array(value: Any) -> List[Any]:
-    decoded = _decode_json_field(value)
-    return decoded if isinstance(decoded, list) else []
-
-
-def _fetch_action_payload(
-    engine: Engine, table_id: int, version_ts: int, action_type: ActionType
-) -> Optional[Dict[str, Any]]:
-    sql = text(
-        """
-        SELECT id AS action_result_id, snippet_json, snippet_alias_json, updated_at, status
-        FROM action_results
-        WHERE table_id = :table_id
-          AND version_ts = :version_ts
-          AND action_type = :action_type
-          AND status IN ('success', 'partial')
-        ORDER BY CASE status WHEN 'success' THEN 0 ELSE 1 END, updated_at DESC
-        LIMIT 1
-        """
-    )
-    with engine.connect() as conn:
-        row = conn.execute(
-            sql,
-            {
-                "table_id": table_id,
-                "version_ts": version_ts,
-                "action_type": action_type.value,
-            },
-        ).mappings().first()
-    return dict(row) if row else None
-
-
-def _load_snippet_sources(
-    engine: Engine, table_id: int, version_ts: int
-) -> Tuple[List[Any], List[Any], Optional[datetime], Optional[int], Optional[int]]:
-    alias_row = _fetch_action_payload(engine, table_id, version_ts, ActionType.SNIPPET_ALIAS)
-    snippet_row = _fetch_action_payload(engine, table_id, version_ts, ActionType.SNIPPET)
-
-    snippet_json = _coerce_json_array(alias_row.get("snippet_json") if alias_row else None)
-    alias_json = _coerce_json_array(alias_row.get("snippet_alias_json") if alias_row else None)
-    updated_at: Optional[datetime] = alias_row.get("updated_at") if alias_row else None
-    alias_action_id: Optional[int] = alias_row.get("action_result_id") if alias_row else None
-    snippet_action_id: Optional[int] = snippet_row.get("action_result_id") if snippet_row else None
-
-    if not snippet_json and snippet_row:
-        snippet_json = _coerce_json_array(snippet_row.get("snippet_json"))
-        if updated_at is None:
-            updated_at = snippet_row.get("updated_at")
-        if alias_action_id is None:
-            alias_action_id = snippet_action_id
-
-    if not updated_at and alias_row:
-        updated_at = alias_row.get("updated_at")
-
-    return snippet_json, alias_json, updated_at, alias_action_id, snippet_action_id
-
-
-def _normalize_aliases(raw_aliases: Any) -> List[Dict[str, Any]]:
-    aliases: List[Dict[str, Any]] = []
-    seen: set[str] = set()
-    if not raw_aliases:
-        return aliases
-    if not isinstance(raw_aliases, list):
-        return aliases
-    for item in raw_aliases:
-        if isinstance(item, dict):
-            text_val = item.get("text")
-            if not text_val or text_val in seen:
-                continue
-            seen.add(text_val)
-            aliases.append({"text": text_val, "tone": item.get("tone")})
-        elif isinstance(item, str):
-            if item in seen:
-                continue
-            seen.add(item)
-            aliases.append({"text": item})
-    return aliases
-
-
-def _normalize_str_list(values: Any) -> List[str]:
-    if not values:
-        return []
-    if not isinstance(values, list):
-        return []
-    seen: set[str] = set()
-    normalised: List[str] = []
-    for val in values:
-        if not isinstance(val, str):
-            continue
-        if val in seen:
-            continue
-        seen.add(val)
-        normalised.append(val)
-    return normalised
-
-
-def _merge_alias_lists(primary: List[Dict[str, Any]], secondary: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    merged: List[Dict[str, Any]] = []
-    seen: set[str] = set()
-    for source in (primary, secondary):
-        for item in source:
-            if not isinstance(item, dict):
-                continue
-            text_val = item.get("text")
-            if not text_val or text_val in seen:
-                continue
-            seen.add(text_val)
-            merged.append({"text": text_val, "tone": item.get("tone")})
-    return merged
-
-
-def _merge_str_lists(primary: List[str], secondary: List[str]) -> List[str]:
-    merged: List[str] = []
-    seen: set[str] = set()
-    for source in (primary, secondary):
-        for item in source:
-            if item in seen:
-                continue
-            seen.add(item)
-            merged.append(item)
-    return merged
-
-
-def _build_alias_map(alias_payload: List[Any]) -> Dict[str, Dict[str, Any]]:
-    alias_map: Dict[str, Dict[str, Any]] = {}
-    for item in alias_payload:
-        if not isinstance(item, dict):
-            continue
-        alias_id = item.get("id")
-        if not alias_id:
-            continue
-        existing = alias_map.setdefault(
-            alias_id,
-            {"aliases": [], "keywords": [], "intent_tags": []},
-        )
-        existing["aliases"] = _merge_alias_lists(
-            existing["aliases"], _normalize_aliases(item.get("aliases"))
-        )
-        existing["keywords"] = _merge_str_lists(
-            existing["keywords"], _normalize_str_list(item.get("keywords"))
-        )
-        existing["intent_tags"] = _merge_str_lists(
-            existing["intent_tags"], _normalize_str_list(item.get("intent_tags"))
-        )
-    return alias_map
-
-
-def merge_snippet_records_from_db(
-    table_id: int,
-    version_ts: int,
-    *,
-    engine: Optional[Engine] = None,
-) -> List[Dict[str, Any]]:
-    """
-    Load snippet + snippet_alias JSON from action_results after snippet_alias is stored,
-    then merge into a unified snippet object list ready for downstream RAG.
-    """
-    engine = engine or get_engine()
-    snippets, aliases, updated_at, alias_action_id, snippet_action_id = _load_snippet_sources(
-        engine, table_id, version_ts
-    )
-    alias_map = _build_alias_map(aliases)
-
-    merged: List[Dict[str, Any]] = []
-    seen_ids: set[str] = set()
-
-    for snippet in snippets:
-        if not isinstance(snippet, dict):
-            continue
-        snippet_id = snippet.get("id")
-        if not snippet_id:
-            continue
-        alias_info = alias_map.get(snippet_id)
-        record = dict(snippet)
-        record_aliases = _normalize_aliases(record.get("aliases"))
-        record_keywords = _normalize_str_list(record.get("keywords"))
-        record_intents = _normalize_str_list(record.get("intent_tags"))
-
-        if alias_info:
-            record_aliases = _merge_alias_lists(record_aliases, alias_info["aliases"])
-            record_keywords = _merge_str_lists(record_keywords, alias_info["keywords"])
-            record_intents = _merge_str_lists(record_intents, alias_info["intent_tags"])
-
-        record["aliases"] = record_aliases
-        record["keywords"] = record_keywords
-        record["intent_tags"] = record_intents
-        record["table_id"] = table_id
-        record["version_ts"] = version_ts
-        record["updated_at_from_action"] = updated_at
-        record["source"] = "snippet"
-        record["action_result_id"] = alias_action_id or snippet_action_id
-        merged.append(record)
-        seen_ids.add(snippet_id)
-
-    for alias_id, alias_info in alias_map.items():
-        if alias_id in seen_ids:
-            continue
-        if alias_action_id is None and snippet_action_id is None:
-            continue
-        merged.append(
-            {
-                "id": alias_id,
-                "aliases": alias_info["aliases"],
-                "keywords": alias_info["keywords"],
-                "intent_tags": alias_info["intent_tags"],
-                "table_id": table_id,
-                "version_ts": version_ts,
-                "updated_at_from_action": updated_at,
-                "source": "alias_only",
-                "action_result_id": alias_action_id or snippet_action_id,
-            }
-        )
-
-    return merged
-
-
-def _stable_rag_item_id(table_id: int, version_ts: int, snippet_id: str) -> int:
-    digest = hashlib.md5(f"{table_id}:{version_ts}:{snippet_id}".encode("utf-8")).hexdigest()
-    return int(digest[:16], 16) % 9_000_000_000_000_000_000
-
-
-def _to_serializable(value: Any) -> Any:
-    if value is None or isinstance(value, (str, int, float, bool)):
-        return value
-    if isinstance(value, datetime):
-        return value.isoformat()
-    if isinstance(value, dict):
-        return {k: _to_serializable(v) for k, v in value.items()}
-    if isinstance(value, list):
-        return [_to_serializable(v) for v in value]
-    return str(value)
-
-
-def _build_rag_text(snippet: Dict[str, Any]) -> str:
-    # Deterministic text concatenation for embedding input.
-    parts: List[str] = []
-
-    def _add(label: str, value: Any) -> None:
-        if value is None:
-            return
-        if isinstance(value, list):
-            value = ", ".join([str(v) for v in value if v])
-        elif isinstance(value, dict):
-            value = json.dumps(value, ensure_ascii=False)
-        if value:
-            parts.append(f"{label}: {value}")
-
-    _add("Title", snippet.get("title") or snippet.get("id"))
-    _add("Description", snippet.get("desc"))
-    _add("Business", snippet.get("business_caliber"))
-    _add("Type", snippet.get("type"))
-    _add("Examples", snippet.get("examples") or [])
-    _add("Aliases", [a.get("text") for a in snippet.get("aliases") or [] if isinstance(a, dict)])
-    _add("Keywords", snippet.get("keywords") or [])
-    _add("IntentTags", snippet.get("intent_tags") or [])
-    _add("Applicability", snippet.get("applicability"))
-    _add("DialectSQL", snippet.get("dialect_sql"))
-    return "\n".join(parts)
-
-
-def _prepare_rag_payloads(
-    snippets: List[Dict[str, Any]],
-    table_id: int,
-    version_ts: int,
-    workspace_id: int,
-    rag_item_type: str = "SNIPPET",
-) -> Tuple[List[Dict[str, Any]], List[RagItemPayload]]:
-    rows: List[Dict[str, Any]] = []
-    payloads: List[RagItemPayload] = []
-    now = datetime.utcnow()
-
-    for snippet in snippets:
-        snippet_id = snippet.get("id")
-        if not snippet_id:
-            continue
-        action_result_id = snippet.get("action_result_id")
-        if action_result_id is None:
-            logger.warning(
-                "Skipping snippet without action_result_id for RAG ingestion (table_id=%s version_ts=%s snippet_id=%s)",
-                table_id,
-                version_ts,
-                snippet_id,
-            )
-            continue
-        rag_item_id = _stable_rag_item_id(table_id, version_ts, snippet_id)
-        rag_text = _build_rag_text(snippet)
-        serializable_snippet = _to_serializable(snippet)
-        merged_json = json.dumps(serializable_snippet, ensure_ascii=False)
-        updated_at_raw = snippet.get("updated_at_from_action") or now
-        if isinstance(updated_at_raw, str):
-            try:
-                updated_at = datetime.fromisoformat(updated_at_raw)
-            except ValueError:
-                updated_at = now
-        else:
-            updated_at = updated_at_raw if isinstance(updated_at_raw, datetime) else now
-
-        created_at = updated_at
-
-        row = {
-            "rag_item_id": rag_item_id,
-            "workspace_id": workspace_id,
-            "table_id": table_id,
-            "version_ts": version_ts,
-            "created_at": created_at,
-            "action_result_id": action_result_id,
-            "snippet_id": snippet_id,
-            "rag_text": rag_text,
-            "merged_json": merged_json,
-            "updated_at": updated_at,
-        }
-        rows.append(row)
-
-        payloads.append(
-            RagItemPayload(
-                id=rag_item_id,
-                workspaceId=workspace_id,
-                name=snippet.get("title") or snippet_id,
-                embeddingData=rag_text,
-                type=rag_item_type or "SNIPPET",
-            )
-        )
-
-    return rows, payloads
-
-
-def _upsert_rag_snippet_rows(engine: Engine, rows: Sequence[Dict[str, Any]]) -> None:
-    if not rows:
-        return
-    delete_sql = text("DELETE FROM rag_snippet WHERE rag_item_id=:rag_item_id")
-    insert_sql = text(
-        """
-        INSERT INTO rag_snippet (
-            rag_item_id,
-            workspace_id,
-            table_id,
-            version_ts,
-            created_at,
-            action_result_id,
-            snippet_id,
-            rag_text,
-            merged_json,
-            updated_at
-        ) VALUES (
-            :rag_item_id,
-            :workspace_id,
-            :table_id,
-            :version_ts,
-            :created_at,
-            :action_result_id,
-            :snippet_id,
-            :rag_text,
-            :merged_json,
-            :updated_at
-        )
-        """
-    )
-    with engine.begin() as conn:
-        for row in rows:
-            conn.execute(delete_sql, row)
-            conn.execute(insert_sql, row)
-
-
-async def ingest_snippet_rag_from_db(
-    table_id: int,
-    version_ts: int,
-    *,
-    workspace_id: int,
-    rag_item_type: str = "SNIPPET",
-    client,
-    engine: Optional[Engine] = None,
-    rag_client: Optional[RagAPIClient] = None,
-) -> List[int]:
-    """
-    Merge snippet + alias JSON from action_results, persist to rag_snippet, then push to RAG via addBatch.
-    Returns list of rag_item_id ingested.
-    """
-    engine = engine or get_engine()
-    snippets = merge_snippet_records_from_db(table_id, version_ts, engine=engine)
-    if not snippets:
-        logger.info(
-            "No snippets available for RAG ingestion (table_id=%s version_ts=%s)",
-            table_id,
-            version_ts,
-        )
-        return []
-
-    rows, payloads = _prepare_rag_payloads(
-        snippets,
-        table_id=table_id,
-        version_ts=version_ts,
-        workspace_id=workspace_id,
-        rag_item_type=rag_item_type,
-    )
-
-    _upsert_rag_snippet_rows(engine, rows)
-
-    rag_client = rag_client or RagAPIClient()
-    await rag_client.add_batch(client, payloads)
-    return [row["rag_item_id"] for row in rows]
--- a/app/settings.py
+++ b/app/settings.py
@ -20,11 +20,7 @@ PROVIDER_KEY_ENV_MAP: Dict[str, str] = {
 }


-DEFAULT_IMPORT_MODEL = os.getenv("DEFAULT_IMPORT_MODEL", "deepseek:deepseek-chat")
-NEW_API_BASE_URL = os.getenv("NEW_API_BASE_URL")
-NEW_API_AUTH_TOKEN = os.getenv("NEW_API_AUTH_TOKEN")
-RAG_API_BASE_URL = os.getenv("RAG_API_BASE_URL", "https://tchatbi.agentcarrier.cn/chatbi/api")
-RAG_API_AUTH_TOKEN = os.getenv("RAG_API_AUTH_TOKEN")
+DEFAULT_IMPORT_MODEL = os.getenv("DEFAULT_IMPORT_MODEL", "openai:gpt-4.1-mini")


@lru_cache(maxsize=1)
--- a/app/utils/llm_usage.py
+++ b/app/utils/llm_usage.py
@ -1,116 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, Dict, Iterable, Optional
-
-
-PROMPT_TOKEN_KEYS: tuple[str, ...] = ("prompt_tokens", "input_tokens", "promptTokenCount")
-COMPLETION_TOKEN_KEYS: tuple[str, ...] = (
-    "completion_tokens",
-    "output_tokens",
-    "candidatesTokenCount",
-)
-TOTAL_TOKEN_KEYS: tuple[str, ...] = ("total_tokens", "totalTokenCount")
-USAGE_CONTAINER_KEYS: tuple[str, ...] = ("usage", "usageMetadata", "usage_metadata")
-
-
-def _normalize_usage_value(value: Any) -> Any:
-    if isinstance(value, (int, float)):
-        return int(value)
-
-    if isinstance(value, str):
-        stripped = value.strip()
-        if not stripped:
-            return None
-        try:
-            numeric = float(stripped)
-        except ValueError:
-            return None
-        return int(numeric)
-
-    if isinstance(value, dict):
-        normalized: Dict[str, Any] = {}
-        for key, nested_value in value.items():
-            normalized_value = _normalize_usage_value(nested_value)
-            if normalized_value is not None:
-                normalized[key] = normalized_value
-        return normalized or None
-
-    if isinstance(value, (list, tuple, set)):
-        normalized_list = [
-            item for item in (_normalize_usage_value(element) for element in value) if item is not None
-        ]
-        return normalized_list or None
-
-    return None
-
-
-def _first_numeric(payload: Dict[str, Any], keys: Iterable[str]) -> Optional[int]:
-    for key in keys:
-        value = payload.get(key)
-        if isinstance(value, (int, float)):
-            return int(value)
-    return None
-
-
-def _canonicalize_counts(payload: Dict[str, Any]) -> None:
-    prompt = _first_numeric(payload, PROMPT_TOKEN_KEYS)
-    completion = _first_numeric(payload, COMPLETION_TOKEN_KEYS)
-    total = _first_numeric(payload, TOTAL_TOKEN_KEYS)
-
-    if prompt is not None:
-        payload["prompt_tokens"] = prompt
-    else:
-        payload.pop("prompt_tokens", None)
-
-    if completion is not None:
-        payload["completion_tokens"] = completion
-    else:
-        payload.pop("completion_tokens", None)
-
-    if total is not None:
-        payload["total_tokens"] = total
-    elif prompt is not None and completion is not None:
-        payload["total_tokens"] = prompt + completion
-    else:
-        payload.pop("total_tokens", None)
-
-    for alias in PROMPT_TOKEN_KEYS[1:]:
-        payload.pop(alias, None)
-    for alias in COMPLETION_TOKEN_KEYS[1:]:
-        payload.pop(alias, None)
-    for alias in TOTAL_TOKEN_KEYS[1:]:
-        payload.pop(alias, None)
-
-
-def _extract_usage_container(candidate: Any) -> Optional[Dict[str, Any]]:
-    if not isinstance(candidate, dict):
-        return None
-    for key in USAGE_CONTAINER_KEYS:
-        value = candidate.get(key)
-        if isinstance(value, dict):
-            return value
-    return None
-
-
-def extract_usage(payload: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    """Unified helper to parse token usage metadata from diverse provider responses."""
-    if not isinstance(payload, dict):
-        return None
-
-    usage_candidate = _extract_usage_container(payload)
-    if usage_candidate is None:
-        raw_section = payload.get("raw")
-        usage_candidate = _extract_usage_container(raw_section)
-
-    if usage_candidate is None:
-        return None
-
-    normalized = _normalize_usage_value(usage_candidate)
-    if not isinstance(normalized, dict):
-        return None
-
-    _canonicalize_counts(normalized)
-    return normalized or None
-
-
-__all__ = ["extract_usage"]
--- a/deepseek-result.json
+++ b/deepseek-result.json
@ -0,0 +1,41 @@
+{
+    "provider": "deepseek",
+    "model": "deepseek-chat",
+    "choices": [
+        {
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": "```json\n{\n  \"table_name\": \"national_brand_sales\",\n  \"description\": \"全国品牌系统外销售数据\",\n  \"columns\": [\n    {\n      \"original_name\": \"品牌\",\n      \"standard_name\": \"brand\",\n      \"data_type\": \"string\",\n      \"db_type\": \"varchar(50)\",\n      \"java_type\": \"string\",\n      \"nullable\": true,\n      \"distinct_count_sample\": 5,\n      \"null_ratio_sample\": 0.4,\n      \"is_enum_candidate\": false,\n      \"description\": \"品牌名称\",\n      \"date_format\": null\n    },\n    {\n      \"original_name\": \"产品价类\",\n      \"standard_name\": \"price_category\",\n      \"data_type\": \"string\",\n      \"db_type\": \"varchar(10)\",\n      \"java_type\": \"string\",\n      \"nullable\": false,\n      \"distinct_count_sample\": 3,\n      \"null_ratio_sample\": 0.0,\n      \"is_enum_candidate\": true,\n      \"description\": \"产品价格分类（一类/二类/三类）\",\n      \"date_format\": null\n    },\n    {\n      \"original_name\": \"是否重点品牌"
+            }
+        }
+    ],
+    "raw": {
+        "id": "67f3cc80-38bc-4bb7-b336-48d4886722c4",
+        "object": "chat.completion",
+        "created": 1761752207,
+        "model": "deepseek-chat",
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": "```json\n{\n  \"table_name\": \"national_brand_sales\",\n  \"description\": \"全国品牌系统外销售数据\",\n  \"columns\": [\n    {\n      \"original_name\": \"品牌\",\n      \"standard_name\": \"brand\",\n      \"data_type\": \"string\",\n      \"db_type\": \"varchar(50)\",\n      \"java_type\": \"string\",\n      \"nullable\": true,\n      \"distinct_count_sample\": 5,\n      \"null_ratio_sample\": 0.4,\n      \"is_enum_candidate\": false,\n      \"description\": \"品牌名称\",\n      \"date_format\": null\n    },\n    {\n      \"original_name\": \"产品价类\",\n      \"standard_name\": \"price_category\",\n      \"data_type\": \"string\",\n      \"db_type\": \"varchar(10)\",\n      \"java_type\": \"string\",\n      \"nullable\": false,\n      \"distinct_count_sample\": 3,\n      \"null_ratio_sample\": 0.0,\n      \"is_enum_candidate\": true,\n      \"description\": \"产品价格分类（一类/二类/三类）\",\n      \"date_format\": null\n    },\n    {\n      \"original_name\": \"是否重点品牌"
+                },
+                "logprobs": null,
+                "finish_reason": "length"
+            }
+        ],
+        "usage": {
+            "prompt_tokens": 1078,
+            "completion_tokens": 256,
+            "total_tokens": 1334,
+            "prompt_tokens_details": {
+                "cached_tokens": 1024
+            },
+            "prompt_cache_hit_tokens": 1024,
+            "prompt_cache_miss_tokens": 54
+        },
+        "system_fingerprint": "fp_ffc7281d48_prod0820_fp8_kvcache"
+    }
+}
--- a/demo/水务/水务-gemini2.5-ge-result.json
+++ b/demo/水务/水务-gemini2.5-ge-result.json
@ -1 +0,0 @@
-{"role": "dimension", "time": {"range": null, "column": null, "has_gaps": null, "granularity": "unknown"}, "grain": ["service_point_id"], "table": "data-ge.water_meter_info", "columns": [{"name": "meter_subtype", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 9, "distinct_ratio": 0.03, "pk_candidate_score": 0.03, "metric_candidate_score": 0.0}, {"name": "installation_position", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 4, "distinct_ratio": 0.013333333333333334, "pk_candidate_score": 0.013333333333333334, "metric_candidate_score": 0.0}, {"name": "supply_office", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 11, "distinct_ratio": 0.03666666666666667, "pk_candidate_score": 0.03666666666666667, "metric_candidate_score": 0.0}, {"name": "meter_diameter", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 8, "distinct_ratio": 0.02666666666666667, "pk_candidate_score": 0.02666666666666667, "metric_candidate_score": 0.0}, {"name": "account_id", "dtype": "unknown", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列的统计指标（如空值率、唯一性）缺失，但根据命名规则推断为ID。", "enumish": null, "null_rate": null, "top_values": [], "semantic_type": "id", "distinct_count": null, "distinct_ratio": null, "pk_candidate_score": 0.9, "metric_candidate_score": 0.0}, {"name": "service_point_id", "dtype": "unknown", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列的统计指标（如空值率、唯一性）缺失，但根据命名规则推断为ID。", "enumish": null, "null_rate": null, "top_values": [], "semantic_type": "id", "distinct_count": null, "distinct_ratio": null, "pk_candidate_score": 0.95, "metric_candidate_score": 0.0}, {"name": "station", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 36, "distinct_ratio": 0.12, "pk_candidate_score": 0.12, "metric_candidate_score": 0.0}, {"name": "meter_type", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 5, "distinct_ratio": 0.016666666666666666, "pk_candidate_score": 0.016666666666666666, "metric_candidate_score": 0.0}, {"name": "district", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 13, "distinct_ratio": 0.043333333333333335, "pk_candidate_score": 0.043333333333333335, "metric_candidate_score": 0.0}, {"name": "meter_status", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列只有一个唯一值 '有效'。", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 1, "distinct_ratio": 0.0033333333333333335, "pk_candidate_score": 0.0033333333333333335, "metric_candidate_score": 0.0}], "quality": {"warning_hints": ["列 'meter_status' 只有一个唯一值 '有效'，可能为常量列。"], "failed_expectations": []}, "row_count": 300, "fk_candidates": [], "confidence_notes": ["表角色(role)被推断为 'dimension'，因为其列几乎完全由ID和类别属性构成，且缺少数值指标或时间序列列。", "主键候选(primary_key_candidates) 'service_point_id' 和 'account_id' 是基于命名约定（包含'_id'）推断的。其唯一性和非空性未在GE结果中直接度量，因此这是一个高置信度的猜测。", "表粒度(grain)可能为 'service_point'，与推断的主键 'service_point_id' 相对应。", "未根据列名或数据格式识别出时间列。"], "primary_key_candidates": [["service_point_id"], ["account_id"]]}
--- a/demo/水务/水务-gemini2.5-snippet-alias.json
+++ b/demo/水务/水务-gemini2.5-snippet-alias.json
@ -1,180 +0,0 @@
-[
-    {
-        "id": "snpt_count-service-points-by-dimension",
-        "aliases": [
-            {
-                "text": "各个区有多少水表",
-                "tone": "口语"
-            },
-            {
-                "text": "按维度统计用水点数",
-                "tone": "中性"
-            },
-            {
-                "text": "各维度用水点数量分布",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "用水点数",
-            "service_point_count",
-            "数量",
-            "统计",
-            "汇总",
-            "aggregate",
-            "维度",
-            "dimension",
-            "区域",
-            "district",
-            "供水所",
-            "分组统计",
-            "水表"
-        ],
-        "intent_tags": [
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_topn-service-points-by-dimension",
-        "aliases": [
-            {
-                "text": "哪个地方水表最多",
-                "tone": "口语"
-            },
-            {
-                "text": "用水点数Top-N排名",
-                "tone": "中性"
-            },
-            {
-                "text": "Top-N用水点数维度排行",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "Top-N",
-            "top",
-            "排名",
-            "排行",
-            "ranking",
-            "最多",
-            "用水点数",
-            "service_point_count",
-            "维度",
-            "dimension",
-            "站点",
-            "station",
-            "水表"
-        ],
-        "intent_tags": [
-            "topn",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_ratio-service-points-by-dimension",
-        "aliases": [
-            {
-                "text": "各种水表各占多少",
-                "tone": "口语"
-            },
-            {
-                "text": "各维度用水点数占比",
-                "tone": "中性"
-            },
-            {
-                "text": "用水点维度构成分析",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "占比",
-            "percentage",
-            "百分比",
-            "ratio",
-            "构成",
-            "分布",
-            "用水点数",
-            "水表类型",
-            "meter_type",
-            "维度",
-            "dimension",
-            "水表"
-        ],
-        "intent_tags": [
-            "ratio",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_quality-check-duplicate-spid",
-        "aliases": [
-            {
-                "text": "有没有重复的水表号",
-                "tone": "口语"
-            },
-            {
-                "text": "检查重复的用水点ID",
-                "tone": "中性"
-            },
-            {
-                "text": "用水点ID唯一性校验",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "数据质量",
-            "quality",
-            "检查",
-            "校验",
-            "重复",
-            "duplicate",
-            "唯一性",
-            "uniqueness",
-            "用水点ID",
-            "service_point_id",
-            "异常检测",
-            "主键"
-        ],
-        "intent_tags": [
-            "quality",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_sample-filter-service-points-by-dims",
-        "aliases": [
-            {
-                "text": "给我看城区的机械表",
-                "tone": "口语"
-            },
-            {
-                "text": "按多维度筛选用水点",
-                "tone": "中性"
-            },
-            {
-                "text": "多维组合条件过滤用水点",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "筛选",
-            "过滤",
-            "filter",
-            "查询",
-            "明细",
-            "列表",
-            "sample",
-            "用水点",
-            "区域",
-            "district",
-            "水表类型",
-            "meter_type",
-            "条件查询"
-        ],
-        "intent_tags": [
-            "sample",
-            "filter"
-        ]
-    }
-]
--- a/demo/水务/水务-gemini2.5-snippet.json
+++ b/demo/水务/水务-gemini2.5-snippet.json
@ -1,186 +0,0 @@
-[
-    {
-        "id": "snpt_count-service-points-by-dimension",
-        "desc": "按指定维度（如区域、供水所）分组，统计各分类下的用水点数量。",
-        "type": "aggregate",
-        "title": "按维度统计用水点数",
-        "examples": [
-            "按区域统计用水点数量",
-            "各个供水所分别有多少个用水点"
-        ],
-        "variables": [
-            {
-                "name": "dimension_column",
-                "type": "column",
-                "default": "district"
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT\n  `${dimension_column}`,\n  COUNT(DISTINCT service_point_id) AS service_point_count\nFROM\n  `data-ge.water_meter_info`\nGROUP BY\n  `${dimension_column}`\nORDER BY\n  service_point_count DESC;"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "适用于对水表档案信息进行分类汇总统计。",
-                    "可将变量 ${dimension_column} 替换为任一维度列，如 district, supply_office, station, meter_type 等。"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": null
-            },
-            "time_column": null,
-            "required_columns": [
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "用水点数：对 `service_point_id` 进行去重计数，代表一个独立的服务点（通常对应一个水表）。统计粒度为“指定维度”。"
-    },
-    {
-        "id": "snpt_topn-service-points-by-dimension",
-        "desc": "按指定维度（如区域、站点）统计用水点数，并展示数量最多的前N个分类。",
-        "type": "topn",
-        "title": "Top-N 用水点数维度排名",
-        "examples": [
-            "哪个区域的用水点最多",
-            "用水点数排名前5的站点是哪些"
-        ],
-        "variables": [
-            {
-                "name": "dimension_column",
-                "type": "column",
-                "default": "station"
-            },
-            {
-                "name": "top_n",
-                "type": "int",
-                "default": 10
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT\n  `${dimension_column}`,\n  COUNT(DISTINCT service_point_id) AS service_point_count\nFROM\n  `data-ge.water_meter_info`\nGROUP BY\n  `${dimension_column}`\nORDER BY\n  service_point_count DESC\nLIMIT ${top_n};"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "维度 `station` 基数较高 (36)，建议 Top-N 查询时结合业务场景合理设置 N 值。"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 36
-            },
-            "time_column": null,
-            "required_columns": [
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "用水点数：对 `service_point_id` 进行去重计数。排名依据为各维度分类下的用水点总数。统计粒度为“指定维度”。"
-    },
-    {
-        "id": "snpt_ratio-service-points-by-dimension",
-        "desc": "计算在指定维度下，各分类的用水点数占总用水点数的百分比，以分析其分布构成。",
-        "type": "ratio",
-        "title": "各维度用水点数占比",
-        "examples": [
-            "不同水表类型（meter_type）的分布情况",
-            "各个区域的用水点占比是多少"
-        ],
-        "variables": [
-            {
-                "name": "dimension_column",
-                "type": "column",
-                "default": "meter_type"
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT\n  `${dimension_column}`,\n  COUNT(DISTINCT service_point_id) AS service_point_count,\n  COUNT(DISTINCT service_point_id) * 100.0 / SUM(COUNT(DISTINCT service_point_id)) OVER () AS percentage\nFROM\n  `data-ge.water_meter_info`\nGROUP BY\n  `${dimension_column}`\nORDER BY\n  service_point_count DESC;"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "SQL模板使用了窗口函数 SUM() OVER()，请确保MySQL版本支持（8.0+）。"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": null
-            },
-            "time_column": null,
-            "required_columns": [
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "用水点数占比：某分类下的用水点数 / 总用水点数。用水点数以 `service_point_id` 去重计数。统计粒度为“指定维度”。"
-    },
-    {
-        "id": "snpt_quality-check-duplicate-spid",
-        "desc": "查找在用水点信息表中存在重复的 `service_point_id`，用于数据质量校验。",
-        "type": "quality",
-        "title": "检查重复的用水点ID",
-        "examples": [
-            "检查是否存在重复的水表档案",
-            "校验用水点ID的唯一性"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT\n  service_point_id,\n  COUNT(*) AS occurrences\nFROM\n  `data-ge.water_meter_info`\nGROUP BY\n  service_point_id\nHAVING\n  COUNT(*) > 1;"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "预期返回结果为空。若有返回，则表示数据存在一致性问题，`service_point_id` 未能作为唯一主键。"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": null
-            },
-            "time_column": null,
-            "required_columns": [
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "重复项：指 `service_point_id` 出现次数大于1的记录。此ID应为表的主键，理论上不应重复。"
-    },
-    {
-        "id": "snpt_sample-filter-service-points-by-dims",
-        "desc": "根据区域、水表类型、供水所等多个维度组合条件，筛选出符合条件的用水点明细。",
-        "type": "sample",
-        "title": "多维度筛选用水点列表",
-        "examples": [
-            "查询城区的机械表有哪些",
-            "拉取某个供水所下特定口径水表的列表"
-        ],
-        "variables": [
-            {
-                "name": "district_name",
-                "type": "string",
-                "default": "城区"
-            },
-            {
-                "name": "meter_type_name",
-                "type": "string",
-                "default": "机械表"
-            },
-            {
-                "name": "limit_num",
-                "type": "int",
-                "default": 100
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT\n  service_point_id,\n  account_id,\n  district,\n  supply_office,\n  meter_type,\n  meter_subtype,\n  meter_diameter\nFROM\n  `data-ge.water_meter_info`\nWHERE\n  district = '${district_name}'\n  AND meter_type = '${meter_type_name}'\n  -- AND meter_status = '有效' -- 可选：根据画像，该列为常量'有效'，可不加\nLIMIT ${limit_num};"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [],
-                "fk_join_available": false,
-                "dim_cardinality_hint": null
-            },
-            "time_column": null,
-            "required_columns": [
-                "service_point_id",
-                "account_id",
-                "district",
-                "supply_office",
-                "meter_type",
-                "meter_subtype",
-                "meter_diameter"
-            ]
-        },
-        "business_caliber": "返回满足所有筛选条件的用水点明细信息。`meter_status` 列只有一个值 '有效'，通常无需作为筛选条件。"
-    }
-]
--- a/demo/水务/水务-gpt5-ge-desc.json
+++ b/demo/水务/水务-gpt5-ge-desc.json
@ -1,230 +0,0 @@
-{
-    "role": "dimension",
-    "time": {
-        "range": null,
-        "column": null,
-        "has_gaps": null,
-        "granularity": "unknown"
-    },
-    "grain": [
-        "service_point_id"
-    ],
-    "table": "data-ge.water_meter_info",
-    "columns": [
-        {
-            "name": "supply_office",
-            "dtype": "string",
-            "stats": {
-                "max": null,
-                "min": null,
-                "std": null,
-                "mean": null,
-                "skewness": null
-            },
-            "comment": "非空；11 个枚举值（GE 约束）",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [],
-            "semantic_type": "dimension",
-            "distinct_count": 11,
-            "distinct_ratio": 0.03666666666666667,
-            "pk_candidate_score": 0.05,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "station",
-            "dtype": "string",
-            "stats": {
-                "max": null,
-                "min": null,
-                "std": null,
-                "mean": null,
-                "skewness": null
-            },
-            "comment": "非空；36 个枚举值（GE 约束）",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [],
-            "semantic_type": "dimension",
-            "distinct_count": 36,
-            "distinct_ratio": 0.12,
-            "pk_candidate_score": 0.1,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "district",
-            "dtype": "string",
-            "stats": {
-                "max": null,
-                "min": null,
-                "std": null,
-                "mean": null,
-                "skewness": null
-            },
-            "comment": "非空；13 个枚举值（GE 约束）",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [],
-            "semantic_type": "dimension",
-            "distinct_count": 13,
-            "distinct_ratio": 0.043333333333333335,
-            "pk_candidate_score": 0.05,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "meter_diameter",
-            "dtype": "string",
-            "stats": {
-                "max": null,
-                "min": null,
-                "std": null,
-                "mean": null,
-                "skewness": null
-            },
-            "comment": "非空；8 个枚举值（GE 约束）",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [],
-            "semantic_type": "dimension",
-            "distinct_count": 8,
-            "distinct_ratio": 0.02666666666666667,
-            "pk_candidate_score": 0.03,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "meter_status",
-            "dtype": "string",
-            "stats": {
-                "max": null,
-                "min": null,
-                "std": null,
-                "mean": null,
-                "skewness": null
-            },
-            "comment": "非空；单一取值（\"有效\"）",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [],
-            "semantic_type": "dimension",
-            "distinct_count": 1,
-            "distinct_ratio": 0.0033333333333333335,
-            "pk_candidate_score": 0.0,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "meter_subtype",
-            "dtype": "string",
-            "stats": {
-                "max": null,
-                "min": null,
-                "std": null,
-                "mean": null,
-                "skewness": null
-            },
-            "comment": "非空；9 个枚举值（GE 约束）",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [],
-            "semantic_type": "dimension",
-            "distinct_count": 9,
-            "distinct_ratio": 0.03,
-            "pk_candidate_score": 0.03,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "meter_type",
-            "dtype": "string",
-            "stats": {
-                "max": null,
-                "min": null,
-                "std": null,
-                "mean": null,
-                "skewness": null
-            },
-            "comment": "非空；5 个枚举值（GE 约束）",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [],
-            "semantic_type": "dimension",
-            "distinct_count": 5,
-            "distinct_ratio": 0.016666666666666666,
-            "pk_candidate_score": 0.02,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "installation_position",
-            "dtype": "string",
-            "stats": {
-                "max": null,
-                "min": null,
-                "std": null,
-                "mean": null,
-                "skewness": null
-            },
-            "comment": "非空；4 个枚举值（GE 约束）",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [],
-            "semantic_type": "dimension",
-            "distinct_count": 4,
-            "distinct_ratio": 0.013333333333333334,
-            "pk_candidate_score": 0.02,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "service_point_id",
-            "dtype": "unknown",
-            "stats": {
-                "max": null,
-                "min": null,
-                "std": null,
-                "mean": null,
-                "skewness": null
-            },
-            "comment": "命名指示标识列；未提供唯一性或非空验证",
-            "enumish": null,
-            "null_rate": null,
-            "top_values": [],
-            "semantic_type": "id",
-            "distinct_count": null,
-            "distinct_ratio": null,
-            "pk_candidate_score": 0.6,
-            "metric_candidate_score": 0.05
-        },
-        {
-            "name": "account_id",
-            "dtype": "unknown",
-            "stats": {
-                "max": null,
-                "min": null,
-                "std": null,
-                "mean": null,
-                "skewness": null
-            },
-            "comment": "命名指示账户标识；未提供唯一性或非空验证",
-            "enumish": null,
-            "null_rate": null,
-            "top_values": [],
-            "semantic_type": "id",
-            "distinct_count": null,
-            "distinct_ratio": null,
-            "pk_candidate_score": 0.5,
-            "metric_candidate_score": 0.05
-        }
-    ],
-    "quality": {
-        "warning_hints": [
-            "以下列未设置非空校验：service_point_id, account_id（空值情况未知）",
-            "未识别到时间列"
-        ],
-        "failed_expectations": []
-    },
-    "row_count": 300,
-    "fk_candidates": [],
-    "confidence_notes": [
-        "role 判定为 dimension：表内列均为枚举/分类或ID，未发现数值型度量或时间列；34/34 期望均为分类枚举/非空与去重比例。",
-        "grain 猜测为 service_point_id：仅依据命名启发式，缺少唯一性与非空度量佐证（置信度较低）。",
-        "未识别时间列：列名与期望均未涉及日期/时间，也无最小/最大时间范围可推断。"
-    ],
-    "primary_key_candidates": []
-}
--- a/demo/水务/水务-gpt5-snippet-alias.json
+++ b/demo/水务/水务-gpt5-snippet-alias.json
@ -1,372 +0,0 @@
-[
-    {
-        "id": "snpt_topn_station",
-        "aliases": [
-            {
-                "text": "站点水表排行前N",
-                "tone": "中性"
-            },
-            {
-                "text": "哪个站点表最多",
-                "tone": "口语"
-            },
-            {
-                "text": "按站点水表TopN",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "TopN",
-            "排名",
-            "排行",
-            "station",
-            "站点",
-            "水表数",
-            "meter count",
-            "distinct",
-            "去重",
-            "聚合",
-            "排序",
-            "榜单"
-        ],
-        "intent_tags": [
-            "topn",
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_share_district",
-        "aliases": [
-            {
-                "text": "各辖区水表占比",
-                "tone": "中性"
-            },
-            {
-                "text": "哪个辖区占比高",
-                "tone": "口语"
-            },
-            {
-                "text": "按辖区水表比例",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "占比",
-            "ratio",
-            "district",
-            "辖区",
-            "水表数",
-            "meter count",
-            "distinct",
-            "去重",
-            "百分比",
-            "份额",
-            "聚合",
-            "排序",
-            "分布"
-        ],
-        "intent_tags": [
-            "ratio",
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_dist_diameter",
-        "aliases": [
-            {
-                "text": "表径水表数分布",
-                "tone": "中性"
-            },
-            {
-                "text": "不同口径有多少",
-                "tone": "口语"
-            },
-            {
-                "text": "按表径去重计数",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "分布",
-            "distribution",
-            "meter_diameter",
-            "表径",
-            "水表数",
-            "meter count",
-            "distinct",
-            "去重",
-            "聚合",
-            "类别",
-            "category",
-            "条形图",
-            "饼图",
-            "排行"
-        ],
-        "intent_tags": [
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_type_subtype_matrix",
-        "aliases": [
-            {
-                "text": "类型×子类水表数",
-                "tone": "中性"
-            },
-            {
-                "text": "看各类型各子类",
-                "tone": "口语"
-            },
-            {
-                "text": "类型子类组合统计",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "类型",
-            "type",
-            "子类",
-            "subtype",
-            "组合",
-            "matrix",
-            "交叉分析",
-            "cross-tab",
-            "水表数",
-            "meter count",
-            "distinct",
-            "去重",
-            "分布",
-            "聚合",
-            "维度"
-        ],
-        "intent_tags": [
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_quality_spid_uniq",
-        "aliases": [
-            {
-                "text": "服务点ID唯一性检",
-                "tone": "专业"
-            },
-            {
-                "text": "服务点ID有重复吗",
-                "tone": "口语"
-            },
-            {
-                "text": "服务点ID完整性评估",
-                "tone": "中性"
-            }
-        ],
-        "keywords": [
-            "质量检查",
-            "quality",
-            "唯一性",
-            "uniqueness",
-            "重复",
-            "duplicate",
-            "空值",
-            "NULL",
-            "完整性",
-            "integrity",
-            "service_point_id",
-            "数据质量",
-            "统计",
-            "去重",
-            "异常检测"
-        ],
-        "intent_tags": [
-            "quality"
-        ]
-    },
-    {
-        "id": "snpt_quality_account_nulls",
-        "aliases": [
-            {
-                "text": "账户ID缺失明细",
-                "tone": "中性"
-            },
-            {
-                "text": "看看哪些账户为空",
-                "tone": "口语"
-            },
-            {
-                "text": "account_id空值样本",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "质量检查",
-            "缺失",
-            "missing",
-            "空值",
-            "NULL",
-            "account_id",
-            "样本",
-            "sample",
-            "抽样",
-            "sampling",
-            "明细",
-            "排查",
-            "过滤",
-            "WHERE",
-            "LIMIT"
-        ],
-        "intent_tags": [
-            "quality",
-            "sample"
-        ]
-    },
-    {
-        "id": "snpt_sample_random_rows",
-        "aliases": [
-            {
-                "text": "随机抽样水表明细",
-                "tone": "中性"
-            },
-            {
-                "text": "随机取几条看看",
-                "tone": "口语"
-            },
-            {
-                "text": "RAND()样本抽取",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "随机",
-            "random",
-            "样本",
-            "sample",
-            "抽样",
-            "sampling",
-            "明细",
-            "details",
-            "质检",
-            "QA",
-            "RAND()",
-            "LIMIT",
-            "抽取",
-            "数据验证"
-        ],
-        "intent_tags": [
-            "sample"
-        ]
-    },
-    {
-        "id": "snpt_filter_office_type_where",
-        "aliases": [
-            {
-                "text": "按所与类型过滤有效",
-                "tone": "专业"
-            },
-            {
-                "text": "筛选某所的指定类型",
-                "tone": "中性"
-            },
-            {
-                "text": "只看这所的这种表",
-                "tone": "口语"
-            }
-        ],
-        "keywords": [
-            "过滤",
-            "filter",
-            "WHERE",
-            "supply_office",
-            "营业所",
-            "meter_type",
-            "类型",
-            "meter_status",
-            "有效",
-            "条件片段",
-            "筛选",
-            "查询拼接",
-            "字段",
-            "约束"
-        ],
-        "intent_tags": [
-            "filter"
-        ]
-    },
-    {
-        "id": "snpt_office_station_dist",
-        "aliases": [
-            {
-                "text": "所站组合水表数",
-                "tone": "中性"
-            },
-            {
-                "text": "各站在各所有多少",
-                "tone": "口语"
-            },
-            {
-                "text": "营业所×站点分布",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "supply_office",
-            "营业所",
-            "station",
-            "站点",
-            "层级",
-            "hierarchy",
-            "分布",
-            "distribution",
-            "水表数",
-            "meter count",
-            "distinct",
-            "去重",
-            "聚合",
-            "交叉分析",
-            "排行"
-        ],
-        "intent_tags": [
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_total_meter_baseline",
-        "aliases": [
-            {
-                "text": "水表总量基线",
-                "tone": "中性"
-            },
-            {
-                "text": "现在有多少水表",
-                "tone": "口语"
-            },
-            {
-                "text": "全表去重总数",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "总量",
-            "total",
-            "baseline",
-            "基线",
-            "水表总数",
-            "meter total",
-            "service_point_id",
-            "distinct",
-            "去重",
-            "分母",
-            "denominator",
-            "占比",
-            "聚合",
-            "汇总",
-            "snapshot"
-        ],
-        "intent_tags": [
-            "aggregate"
-        ]
-    }
-]
--- a/demo/水务/水务-gpt5-snippet.json
+++ b/demo/水务/水务-gpt5-snippet.json
@ -1,330 +0,0 @@
-[
-    {
-        "id": "snpt_topn_station",
-        "desc": "按站点统计水表数量并取前N",
-        "type": "topn",
-        "title": "站点TopN水表数",
-        "examples": [
-            "各站点水表数量排名前10",
-            "站点水表覆盖情况排行"
-        ],
-        "variables": [
-            {
-                "name": "top_n",
-                "type": "int",
-                "default": 10
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT station,\n       COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY station\nORDER BY meter_cnt DESC\nLIMIT {{top_n}};"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "TopN建议N<=36",
-                    "以service_point_id去重计数",
-                    "无时间列，无法做趋势"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 36
-            },
-            "time_column": null,
-            "required_columns": [
-                "station",
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "水表数=按service_point_id去重计数；粒度=站点。仅统计当前表中的有效记录（不含时间口径）。安全限制：用于分析排名，避免扩大LIMIT造成全量导出。"
-    },
-    {
-        "id": "snpt_share_district",
-        "desc": "统计各辖区水表数及其占比",
-        "type": "ratio",
-        "title": "辖区水表占比",
-        "examples": [
-            "各辖区水表占比",
-            "哪个辖区水表最多"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "WITH by_district AS (\n  SELECT district, COUNT(DISTINCT service_point_id) AS meter_cnt\n  FROM `data-ge`.`water_meter_info`\n  GROUP BY district\n), tot AS (\n  SELECT COUNT(DISTINCT service_point_id) AS total_cnt\n  FROM `data-ge`.`water_meter_info`\n)\nSELECT b.district,\n       b.meter_cnt,\n       ROUND(b.meter_cnt / NULLIF(t.total_cnt, 0) * 100, 2) AS pct\nFROM by_district b\nCROSS JOIN tot t\nORDER BY pct DESC, b.district;"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "占比分母为全表service_point_id去重总数",
-                    "service_point_id为空将被忽略"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 13
-            },
-            "time_column": null,
-            "required_columns": [
-                "district",
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "水表数=按service_point_id去重计数；粒度=辖区。占比=辖区水表数/全表水表总数。安全限制：仅基于本表，不代表全市/全网口径；无时间维度。"
-    },
-    {
-        "id": "snpt_dist_diameter",
-        "desc": "按表径统计水表数量分布",
-        "type": "aggregate",
-        "title": "表径分布统计",
-        "examples": [
-            "不同口径水表有多少",
-            "查看表径分布情况"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT meter_diameter,\n       COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY meter_diameter\nORDER BY meter_cnt DESC, meter_diameter;"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "以service_point_id去重计数",
-                    "适合绘制条形图/饼图"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 8
-            },
-            "time_column": null,
-            "required_columns": [
-                "meter_diameter",
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "水表数=按service_point_id去重计数；粒度=表径。安全限制：仅用于分布分析，不含时间过滤；避免用于明细导出。"
-    },
-    {
-        "id": "snpt_type_subtype_matrix",
-        "desc": "统计水表类型与子类组合的数量",
-        "type": "aggregate",
-        "title": "类型子类分布",
-        "examples": [
-            "不同类型与子类的水表数量",
-            "查看类型与子类的组合分布"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT meter_type,\n       meter_subtype,\n       COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY meter_type, meter_subtype\nORDER BY meter_cnt DESC, meter_type, meter_subtype;"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "组合基数<=5×9=45",
-                    "以service_point_id去重计数"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 45
-            },
-            "time_column": null,
-            "required_columns": [
-                "meter_type",
-                "meter_subtype",
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "水表数=按service_point_id去重计数；粒度=类型×子类组合。安全限制：仅用于汇总分析，不包含时间或业务状态变化。"
-    },
-    {
-        "id": "snpt_quality_spid_uniq",
-        "desc": "评估service_point_id的空值与重复情况",
-        "type": "quality",
-        "title": "服务点唯一性检",
-        "examples": [
-            "检查服务点ID是否唯一",
-            "统计service_point_id空值与重复情况"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT\n  COUNT(*) AS total_rows,\n  SUM(service_point_id IS NULL) AS null_cnt,\n  COUNT(DISTINCT service_point_id) AS distinct_cnt,\n  (COUNT(*) - COUNT(DISTINCT service_point_id)) AS duplicate_rows_est,\n  (\n    SELECT COUNT(*) FROM (\n      SELECT service_point_id\n      FROM `data-ge`.`water_meter_info`\n      GROUP BY service_point_id\n      HAVING COUNT(*) > 1\n    ) AS dup\n  ) AS dup_key_groups\nFROM `data-ge`.`water_meter_info`;"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "用于键完整性检查",
-                    "重复行估算=总行数-去重数"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": null
-            },
-            "time_column": null,
-            "required_columns": [
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "质量检查口径：在本表内评估service_point_id的非空与唯一性，不代表跨表全局唯一。安全限制：仅输出汇总指标，不暴露明细重复值。"
-    },
-    {
-        "id": "snpt_quality_account_nulls",
-        "desc": "抽取account_id为空的记录用于排查",
-        "type": "quality",
-        "title": "账户ID缺失明细",
-        "examples": [
-            "列出account_id为空的水表",
-            "抽样查看账户缺失的数据行"
-        ],
-        "variables": [
-            {
-                "name": "limit_n",
-                "type": "int",
-                "default": 50
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT *\nFROM `data-ge`.`water_meter_info`\nWHERE account_id IS NULL\nLIMIT {{limit_n}};"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "明细仅限小样本抽取",
-                    "建议LIMIT<=100，避免全量导出"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": null
-            },
-            "time_column": null,
-            "required_columns": [
-                "account_id"
-            ]
-        },
-        "business_caliber": "质量抽样：筛出账户ID缺失的水表记录，便于核对。安全限制：仅用于样本排查，不建议在生产中全量导出；如需口径统计请改为COUNT聚合。"
-    },
-    {
-        "id": "snpt_sample_random_rows",
-        "desc": "随机抽取水表信息用于人工核验",
-        "type": "sample",
-        "title": "随机抽样明细",
-        "examples": [
-            "抽样查看水表信息",
-            "随机抽取20条做质检"
-        ],
-        "variables": [
-            {
-                "name": "sample_size",
-                "type": "int",
-                "default": 20
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT *\nFROM `data-ge`.`water_meter_info`\nORDER BY RAND()\nLIMIT {{sample_size}};"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "使用RAND()随机，样本不可复现",
-                    "建议限制样本量"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 300
-            },
-            "time_column": null,
-            "required_columns": [
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "样本抽取：从本表随机返回若干行明细。安全限制：避免扩大LIMIT进行全量下载；如需可复现样本，请改用带种子的随机方法（MySQL不原生支持）。"
-    },
-    {
-        "id": "snpt_filter_office_type_where",
-        "desc": "常用WHERE筛选条件片段：按营业所与类型且为有效",
-        "type": "sample",
-        "title": "机构类型筛选片",
-        "examples": [
-            "筛选A营业所的机械表",
-            "仅查看某营业所的指定类型水表"
-        ],
-        "variables": [
-            {
-                "name": "supply_office",
-                "type": "string"
-            },
-            {
-                "name": "meter_type",
-                "type": "string"
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "WHERE supply_office = '{{supply_office}}'\n  AND meter_type = '{{meter_type}}'\n  AND meter_status = '有效'"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "这是条件片段，可拼接到其他查询",
-                    "meter_status当前为单一值“有效”"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 11
-            },
-            "time_column": null,
-            "required_columns": [
-                "supply_office",
-                "meter_type",
-                "meter_status"
-            ]
-        },
-        "business_caliber": "过滤口径：仅保留指定营业所与指定水表类型、且状态为“有效”的记录。安全限制：为片段用途，需拼接在SELECT…FROM之后使用。"
-    },
-    {
-        "id": "snpt_office_station_dist",
-        "desc": "按营业所与站点组合统计水表数",
-        "type": "aggregate",
-        "title": "所站层级分布",
-        "examples": [
-            "按营业所查看各站点水表数",
-            "所站两级的水表分布情况"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT supply_office,\n       station,\n       COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY supply_office, station\nORDER BY supply_office, meter_cnt DESC, station;"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "组合基数<=11×36=396",
-                    "以service_point_id去重计数",
-                    "如结果过长可再按TopN筛选"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 396
-            },
-            "time_column": null,
-            "required_columns": [
-                "supply_office",
-                "station",
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "水表数=按service_point_id去重计数；粒度=营业所×站点。安全限制：结果行数可能较多，建议在可视化端增加筛选或分页。"
-    },
-    {
-        "id": "snpt_total_meter_baseline",
-        "desc": "获取全表水表去重总量基线",
-        "type": "aggregate",
-        "title": "水表总量基线",
-        "examples": [
-            "当前有多少只水表",
-            "作为占比分析的分母基线"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT COUNT(DISTINCT service_point_id) AS meter_total\nFROM `data-ge`.`water_meter_info`;"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "作为其他占比/分摊分母基线",
-                    "忽略service_point_id为空的记录"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 300
-            },
-            "time_column": null,
-            "required_columns": [
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "水表总量=按service_point_id去重计数；基于当前表的全量记录。安全限制：无时间维度，无法反映存量随时间变化。"
-    }
-]
--- a/demo/水务/水务-qwen3-coder-480b-ge-desc.json
+++ b/demo/水务/水务-qwen3-coder-480b-ge-desc.json
@ -1,415 +0,0 @@
-{
-    "role": "dimension",
-    "time": {
-        "range": null,
-        "column": null,
-        "has_gaps": null,
-        "granularity": "unknown"
-    },
-    "grain": [
-        "account_id",
-        "service_point_id"
-    ],
-    "table": "data-ge.water_meter_info",
-    "columns": [
-        {
-            "name": "supply_office",
-            "dtype": "string",
-            "stats": {},
-            "comment": "供水管理所名称，枚举值",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [
-                {
-                    "pct": null,
-                    "value": "宝山供水管理所"
-                },
-                {
-                    "pct": null,
-                    "value": "黄浦供水管理所"
-                },
-                {
-                    "pct": null,
-                    "value": "青东供水管理所"
-                },
-                {
-                    "pct": null,
-                    "value": "虹口供水管理所"
-                },
-                {
-                    "pct": null,
-                    "value": "闸北供水管理所"
-                },
-                {
-                    "pct": null,
-                    "value": "松北供水管理所"
-                },
-                {
-                    "pct": null,
-                    "value": "杨浦供水管理所"
-                },
-                {
-                    "pct": null,
-                    "value": "长宁供水管理所"
-                },
-                {
-                    "pct": null,
-                    "value": "闵行供水管理所"
-                },
-                {
-                    "pct": null,
-                    "value": "徐汇供水管理所"
-                },
-                {
-                    "pct": null,
-                    "value": "普陀供水管理所"
-                }
-            ],
-            "semantic_type": "dimension",
-            "distinct_count": 11,
-            "distinct_ratio": 0.03666666666666667,
-            "pk_candidate_score": 0.11,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "station",
-            "dtype": "string",
-            "stats": {},
-            "comment": "站点名称，枚举值",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [
-                {
-                    "pct": null,
-                    "value": "新闸站"
-                },
-                {
-                    "pct": null,
-                    "value": "宝杨站"
-                },
-                {
-                    "pct": null,
-                    "value": "江川站"
-                },
-                {
-                    "pct": null,
-                    "value": "长江站"
-                },
-                {
-                    "pct": null,
-                    "value": "市光站"
-                },
-                {
-                    "pct": null,
-                    "value": "徐泾站"
-                },
-                {
-                    "pct": null,
-                    "value": "真北站"
-                },
-                {
-                    "pct": null,
-                    "value": "半淞园站"
-                },
-                {
-                    "pct": null,
-                    "value": "芙蓉江站"
-                },
-                {
-                    "pct": null,
-                    "value": "密云站"
-                }
-            ],
-            "semantic_type": "dimension",
-            "distinct_count": 36,
-            "distinct_ratio": 0.12,
-            "pk_candidate_score": 0.36,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "district",
-            "dtype": "string",
-            "stats": {},
-            "comment": "行政区划名称，枚举值",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [
-                {
-                    "pct": null,
-                    "value": "普陀区"
-                },
-                {
-                    "pct": null,
-                    "value": "闵行区"
-                },
-                {
-                    "pct": null,
-                    "value": "嘉定区"
-                },
-                {
-                    "pct": null,
-                    "value": "杨浦区"
-                },
-                {
-                    "pct": null,
-                    "value": "徐汇区"
-                },
-                {
-                    "pct": null,
-                    "value": "黄浦区"
-                },
-                {
-                    "pct": null,
-                    "value": "松江区"
-                },
-                {
-                    "pct": null,
-                    "value": "长宁区"
-                },
-                {
-                    "pct": null,
-                    "value": "青浦区"
-                },
-                {
-                    "pct": null,
-                    "value": "虹口区"
-                }
-            ],
-            "semantic_type": "dimension",
-            "distinct_count": 13,
-            "distinct_ratio": 0.043333333333333335,
-            "pk_candidate_score": 0.13,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "meter_diameter",
-            "dtype": "string",
-            "stats": {},
-            "comment": "水表直径规格，枚举值",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [
-                {
-                    "pct": null,
-                    "value": "20mm"
-                },
-                {
-                    "pct": null,
-                    "value": "15mm"
-                },
-                {
-                    "pct": null,
-                    "value": "25mm"
-                },
-                {
-                    "pct": null,
-                    "value": "40mm"
-                },
-                {
-                    "pct": null,
-                    "value": "150mm"
-                },
-                {
-                    "pct": null,
-                    "value": "100mm"
-                },
-                {
-                    "pct": null,
-                    "value": "80mm"
-                },
-                {
-                    "pct": null,
-                    "value": "50mm"
-                }
-            ],
-            "semantic_type": "dimension",
-            "distinct_count": 8,
-            "distinct_ratio": 0.02666666666666667,
-            "pk_candidate_score": 0.08,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "meter_status",
-            "dtype": "string",
-            "stats": {},
-            "comment": "水表状态，枚举值",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [
-                {
-                    "pct": null,
-                    "value": "有效"
-                }
-            ],
-            "semantic_type": "dimension",
-            "distinct_count": 1,
-            "distinct_ratio": 0.0033333333333333335,
-            "pk_candidate_score": 0.01,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "meter_subtype",
-            "dtype": "string",
-            "stats": {},
-            "comment": "水表子类型，枚举值",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [
-                {
-                    "pct": null,
-                    "value": "旋翼半液封式"
-                },
-                {
-                    "pct": null,
-                    "value": "超声波式"
-                },
-                {
-                    "pct": null,
-                    "value": "旋翼湿式（指针式）"
-                },
-                {
-                    "pct": null,
-                    "value": "旋翼湿式（数字指针式）"
-                },
-                {
-                    "pct": null,
-                    "value": "电磁式"
-                },
-                {
-                    "pct": null,
-                    "value": "无直管段要求超声波式"
-                },
-                {
-                    "pct": null,
-                    "value": "无直管段要求电磁式"
-                },
-                {
-                    "pct": null,
-                    "value": "垂直螺翼干式"
-                },
-                {
-                    "pct": null,
-                    "value": "机械容积式"
-                }
-            ],
-            "semantic_type": "dimension",
-            "distinct_count": 9,
-            "distinct_ratio": 0.03,
-            "pk_candidate_score": 0.09,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "meter_type",
-            "dtype": "string",
-            "stats": {},
-            "comment": "水表类型，枚举值",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [
-                {
-                    "pct": null,
-                    "value": "容积式机械水表"
-                },
-                {
-                    "pct": null,
-                    "value": "速度式机械水表"
-                },
-                {
-                    "pct": null,
-                    "value": "电磁式远传水表"
-                },
-                {
-                    "pct": null,
-                    "value": "速度式机电远传水表"
-                },
-                {
-                    "pct": null,
-                    "value": "超声波式远传水表"
-                }
-            ],
-            "semantic_type": "dimension",
-            "distinct_count": 5,
-            "distinct_ratio": 0.016666666666666666,
-            "pk_candidate_score": 0.05,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "installation_position",
-            "dtype": "string",
-            "stats": {},
-            "comment": "安装位置，枚举值",
-            "enumish": true,
-            "null_rate": 0.0,
-            "top_values": [
-                {
-                    "pct": null,
-                    "value": "嵌墙表"
-                },
-                {
-                    "pct": null,
-                    "value": "管道井表"
-                },
-                {
-                    "pct": null,
-                    "value": "地下表"
-                },
-                {
-                    "pct": null,
-                    "value": "龙头表"
-                }
-            ],
-            "semantic_type": "dimension",
-            "distinct_count": 4,
-            "distinct_ratio": 0.013333333333333334,
-            "pk_candidate_score": 0.04,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "account_id",
-            "dtype": "string",
-            "stats": {},
-            "comment": "账户ID",
-            "enumish": false,
-            "null_rate": null,
-            "top_values": [],
-            "semantic_type": "id",
-            "distinct_count": null,
-            "distinct_ratio": null,
-            "pk_candidate_score": 0.95,
-            "metric_candidate_score": 0.0
-        },
-        {
-            "name": "service_point_id",
-            "dtype": "string",
-            "stats": {},
-            "comment": "服务点ID",
-            "enumish": false,
-            "null_rate": null,
-            "top_values": [],
-            "semantic_type": "id",
-            "distinct_count": null,
-            "distinct_ratio": null,
-            "pk_candidate_score": 0.95,
-            "metric_candidate_score": 0.0
-        }
-    ],
-    "quality": {
-        "warning_hints": [],
-        "failed_expectations": []
-    },
-    "row_count": 300,
-    "fk_candidates": [],
-    "confidence_notes": [
-        "role判定为dimension，因所有列均为枚举或ID类型，无metric列",
-        "grain依据account_id和service_point_id为唯一标识推测",
-        "未发现时间列，因此time字段为null"
-    ],
-    "primary_key_candidates": [
-        [
-            "account_id"
-        ],
-        [
-            "service_point_id"
-        ]
-    ]
-}
--- a/demo/水务/水务-qwen3-coder-480b-snippet-alias.json
+++ b/demo/水务/水务-qwen3-coder-480b-snippet-alias.json
@ -1,286 +0,0 @@
-[
-    {
-        "id": "snpt_water_meter_top_supply_office",
-        "aliases": [
-            {
-                "text": "供水所水表排行",
-                "tone": "中性"
-            },
-            {
-                "text": "哪个供水所水表最多",
-                "tone": "口语"
-            },
-            {
-                "text": "供水管理所水表TopN统计",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "水表",
-            "供水管理所",
-            "排行",
-            "TopN",
-            "数量",
-            "统计",
-            "count",
-            "排名",
-            "前N",
-            "供水所",
-            "水表数",
-            "维度聚合",
-            "by_dimension",
-            "topn"
-        ],
-        "intent_tags": [
-            "topn",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_water_meter_top_station",
-        "aliases": [
-            {
-                "text": "站点水表数量排行",
-                "tone": "中性"
-            },
-            {
-                "text": "哪个站点水表最多",
-                "tone": "口语"
-            },
-            {
-                "text": "站点维度水表TopN分析",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "水表",
-            "站点",
-            "排行",
-            "TopN",
-            "数量",
-            "统计",
-            "count",
-            "排名",
-            "前N",
-            "站点数",
-            "维度聚合",
-            "by_dimension",
-            "topn"
-        ],
-        "intent_tags": [
-            "topn",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_water_meter_top_district",
-        "aliases": [
-            {
-                "text": "区域水表数量排名",
-                "tone": "中性"
-            },
-            {
-                "text": "哪个区水表最多",
-                "tone": "口语"
-            },
-            {
-                "text": "行政区水表TopN统计",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "水表",
-            "区域",
-            "行政区",
-            "排行",
-            "TopN",
-            "数量",
-            "统计",
-            "count",
-            "排名",
-            "前N",
-            "区",
-            "水表数",
-            "维度聚合",
-            "by_dimension",
-            "topn"
-        ],
-        "intent_tags": [
-            "topn",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_water_meter_share_by_type",
-        "aliases": [
-            {
-                "text": "水表类型占比",
-                "tone": "中性"
-            },
-            {
-                "text": "哪种水表用得最多",
-                "tone": "口语"
-            },
-            {
-                "text": "水表类型分布比例",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "水表",
-            "类型",
-            "占比",
-            "比例",
-            "ratio",
-            "分布",
-            "meter_type",
-            "百分比",
-            "分类统计",
-            "水表类型",
-            "ratio",
-            "aggregate",
-            "by_dimension"
-        ],
-        "intent_tags": [
-            "ratio",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_water_meter_subtype_distribution",
-        "aliases": [
-            {
-                "text": "水表子类型分布",
-                "tone": "中性"
-            },
-            {
-                "text": "各种子类型水表情况",
-                "tone": "口语"
-            },
-            {
-                "text": "水表子类型计数与占比",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "水表",
-            "子类型",
-            "分布",
-            "数量",
-            "占比",
-            "meter_subtype",
-            "统计",
-            "count",
-            "百分比",
-            "分类统计",
-            "aggregate",
-            "by_dimension"
-        ],
-        "intent_tags": [
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_water_meter_installation_position_stats",
-        "aliases": [
-            {
-                "text": "安装位置统计",
-                "tone": "中性"
-            },
-            {
-                "text": "哪种位置装表最多",
-                "tone": "口语"
-            },
-            {
-                "text": "水表安装位置分布",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "水表",
-            "安装位置",
-            "统计",
-            "分布",
-            "installation_position",
-            "数量",
-            "count",
-            "位置",
-            "安装点",
-            "aggregate",
-            "by_dimension"
-        ],
-        "intent_tags": [
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_water_meter_grain_check",
-        "aliases": [
-            {
-                "text": "主键粒度校验",
-                "tone": "中性"
-            },
-            {
-                "text": "数据有没有重复",
-                "tone": "口语"
-            },
-            {
-                "text": "数据粒度一致性检查",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "主键",
-            "粒度",
-            "校验",
-            "质量",
-            "quality",
-            "重复",
-            "唯一性",
-            "account_id",
-            "service_point_id",
-            "数据校验",
-            "质量检查",
-            "异常检测"
-        ],
-        "intent_tags": [
-            "quality"
-        ]
-    },
-    {
-        "id": "snpt_water_meter_sample_records",
-        "aliases": [
-            {
-                "text": "水表数据抽样",
-                "tone": "中性"
-            },
-            {
-                "text": "给我看点水表数据",
-                "tone": "口语"
-            },
-            {
-                "text": "水表记录样本抽取",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "水表",
-            "样本",
-            "抽样",
-            "sample",
-            "随机",
-            "记录",
-            "抽查",
-            "limit",
-            "数据结构",
-            "数据示例",
-            "sample",
-            "limit_rows"
-        ],
-        "intent_tags": [
-            "sample"
-        ]
-    }
-]
--- a/demo/水务/水务-qwen3-coder-480b-snippet.json
+++ b/demo/水务/水务-qwen3-coder-480b-snippet.json
@ -1,235 +0,0 @@
-[
-    {
-        "id": "snpt_water_meter_top_supply_office",
-        "desc": "统计各供水管理所下辖水表数量并排序",
-        "type": "topn",
-        "title": "供水管理所水表数量排行",
-        "examples": [
-            "列出水表最多的前10个供水管理所",
-            "各供水所水表数量排名"
-        ],
-        "variables": [
-            {
-                "name": "top_n",
-                "type": "int",
-                "default": 10
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT supply_office AS dim_value, COUNT(*) AS metric_value FROM `data-ge.water_meter_info` GROUP BY supply_office ORDER BY metric_value DESC LIMIT {{top_n}}"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 11
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "supply_office"
-            ]
-        },
-        "business_caliber": "按供水管理所维度聚合水表总数，粒度=供水管理所"
-    },
-    {
-        "id": "snpt_water_meter_top_station",
-        "desc": "统计各个站点下辖水表数量并排序",
-        "type": "topn",
-        "title": "站点水表数量排行",
-        "examples": [
-            "列出水表最多的前10个站点",
-            "各站点水表数量排名"
-        ],
-        "variables": [
-            {
-                "name": "top_n",
-                "type": "int",
-                "default": 10
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT station AS dim_value, COUNT(*) AS metric_value FROM `data-ge.water_meter_info` GROUP BY station ORDER BY metric_value DESC LIMIT {{top_n}}"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "高基数维度建议LIMIT<=50"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 36
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "station"
-            ]
-        },
-        "business_caliber": "按站点维度聚合水表总数，粒度=站点"
-    },
-    {
-        "id": "snpt_water_meter_top_district",
-        "desc": "统计各区水表数量并排序",
-        "type": "topn",
-        "title": "区域水表数量排行",
-        "examples": [
-            "列出各区水表数量排名",
-            "哪个区的水表最多？"
-        ],
-        "variables": [
-            {
-                "name": "top_n",
-                "type": "int",
-                "default": 10
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT district AS dim_value, COUNT(*) AS metric_value FROM `data-ge.water_meter_info` GROUP BY district ORDER BY metric_value DESC LIMIT {{top_n}}"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 13
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "district"
-            ]
-        },
-        "business_caliber": "按行政区划维度聚合水表总数，粒度=区"
-    },
-    {
-        "id": "snpt_water_meter_share_by_type",
-        "desc": "计算各类水表占总水表的比例",
-        "type": "ratio",
-        "title": "水表类型占比分布",
-        "examples": [
-            "各类水表占比是多少？",
-            "哪种类型的水表使用最广泛？"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT meter_type AS dim_value, COUNT(*) * 100.0 / (SELECT COUNT(*) FROM `data-ge.water_meter_info`) AS ratio_percent FROM `data-ge.water_meter_info` GROUP BY meter_type ORDER BY ratio_percent DESC"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 5
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "meter_type"
-            ]
-        },
-        "business_caliber": "按水表类型分类计算其占比，粒度=水表类型"
-    },
-    {
-        "id": "snpt_water_meter_subtype_distribution",
-        "desc": "展示不同水表子类型的数量及比例",
-        "type": "aggregate",
-        "title": "水表子类型分布情况",
-        "examples": [
-            "各种子类型水表的数量和占比",
-            "哪种子类型水表最多？"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT meter_subtype AS dim_value, COUNT(*) AS count_value, ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM `data-ge.water_meter_info`), 2) AS percentage FROM `data-ge.water_meter_info` GROUP BY meter_subtype ORDER BY count_value DESC"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 9
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "meter_subtype"
-            ]
-        },
-        "business_caliber": "按水表子类型进行计数和百分比统计，粒度=水表子类型"
-    },
-    {
-        "id": "snpt_water_meter_installation_position_stats",
-        "desc": "统计不同安装位置下的水表数量",
-        "type": "aggregate",
-        "title": "安装位置分布统计",
-        "examples": [
-            "各种安装位置的水表数量",
-            "哪种安装位置最为常见？"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT installation_position AS dim_value, COUNT(*) AS count_value FROM `data-ge.water_meter_info` GROUP BY installation_position ORDER BY count_value DESC"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 4
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "installation_position"
-            ]
-        },
-        "business_caliber": "按安装位置对水表进行分组计数，粒度=安装位置"
-    },
-    {
-        "id": "snpt_water_meter_grain_check",
-        "desc": "验证 account_id 和 service_point_id 是否构成唯一组合",
-        "type": "quality",
-        "title": "主键粒度校验",
-        "examples": [
-            "这张表的数据粒度是否正确？",
-            "是否存在重复的服务点记录？"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT IF(COUNT(*) = COUNT(DISTINCT account_id, service_point_id), 'PASS', 'FAIL') AS grain_check_result FROM `data-ge.water_meter_info`"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [],
-                "fk_join_available": false,
-                "dim_cardinality_hint": null
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "account_id",
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "检验数据是否符合预期的主键粒度（account_id + service_point_id）"
-    },
-    {
-        "id": "snpt_water_meter_sample_records",
-        "desc": "随机抽取部分水表信息用于查看结构",
-        "type": "sample",
-        "title": "样本抽取",
-        "examples": [
-            "给我看几条水表数据的例子",
-            "抽查一些原始数据看看格式"
-        ],
-        "variables": [
-            {
-                "name": "limit_rows",
-                "type": "int",
-                "default": 5
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT * FROM `data-ge.water_meter_info` ORDER BY RAND() LIMIT {{limit_rows}}"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [],
-                "fk_join_available": false,
-                "dim_cardinality_hint": null
-            },
-            "time_column": "nullable",
-            "required_columns": []
-        },
-        "business_caliber": "从全量数据中随机采样若干条记录供参考"
-    }
-]
--- a/demo/水务/水务-qwen3-next-80b-ge-desc.json
+++ b/demo/水务/水务-qwen3-next-80b-ge-desc.json
--- a/demo/水务/水务-qwen3-next-80b-snippet-alias.json
+++ b/demo/水务/水务-qwen3-next-80b-snippet-alias.json
@ -1,249 +0,0 @@
-[
-    {
-        "id": "snpt_topn_supply_office_by_account",
-        "aliases": [
-            {
-                "text": "哪个供水所用户最多？",
-                "tone": "口语"
-            },
-            {
-                "text": "按供应办公室统计账户数量",
-                "tone": "中性"
-            },
-            {
-                "text": "供应办公室账户数TopN排名",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "供应办公室",
-            "账户数",
-            "TopN",
-            "排行",
-            "统计",
-            "account_id",
-            "supply_office",
-            "去重",
-            "高占比",
-            "维度分析",
-            "by_dimension",
-            "aggregate",
-            "topn"
-        ],
-        "intent_tags": [
-            "topn",
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_topn_station_by_account",
-        "aliases": [
-            {
-                "text": "哪些站点用户最多？",
-                "tone": "口语"
-            },
-            {
-                "text": "按站点统计账户分布",
-                "tone": "中性"
-            },
-            {
-                "text": "站点账户数Top20排名",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "站点",
-            "账户数",
-            "TopN",
-            "排行",
-            "统计",
-            "station",
-            "account_id",
-            "去重",
-            "高负载",
-            "维度分析",
-            "by_dimension",
-            "aggregate",
-            "topn"
-        ],
-        "intent_tags": [
-            "topn",
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_topn_district_by_account",
-        "aliases": [
-            {
-                "text": "哪个区用户最多？",
-                "tone": "口语"
-            },
-            {
-                "text": "按行政区统计账户数量",
-                "tone": "中性"
-            },
-            {
-                "text": "行政区账户数全量排名",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "行政区",
-            "账户数",
-            "TopN",
-            "排行",
-            "统计",
-            "district",
-            "account_id",
-            "去重",
-            "区域对比",
-            "维度分析",
-            "by_dimension",
-            "aggregate",
-            "topn"
-        ],
-        "intent_tags": [
-            "topn",
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_share_of_meter_type",
-        "aliases": [
-            {
-                "text": "各类水表占多少比例？",
-                "tone": "口语"
-            },
-            {
-                "text": "水表类型占比分析",
-                "tone": "中性"
-            },
-            {
-                "text": "水表类型占比分布",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "水表类型",
-            "占比",
-            "比例",
-            "meter_type",
-            "account_id",
-            "去重",
-            "分布",
-            "主流类型",
-            "技术选型",
-            "ratio",
-            "aggregate",
-            "by_dimension"
-        ],
-        "intent_tags": [
-            "ratio",
-            "aggregate",
-            "by_dimension"
-        ]
-    },
-    {
-        "id": "snpt_sample_account_service_point",
-        "aliases": [
-            {
-                "text": "随机看10条账户信息",
-                "tone": "口语"
-            },
-            {
-                "text": "抽样账户与服务点明细",
-                "tone": "中性"
-            },
-            {
-                "text": "账户-服务点随机抽样验证",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "抽样",
-            "随机",
-            "样本",
-            "account_id",
-            "service_point_id",
-            "数据质量",
-            "验证",
-            "唯一性",
-            "格式检查",
-            "sample",
-            "quality"
-        ],
-        "intent_tags": [
-            "sample",
-            "quality"
-        ]
-    },
-    {
-        "id": "snpt_filter_meter_status_valid",
-        "aliases": [
-            {
-                "text": "只取有效的水表记录",
-                "tone": "口语"
-            },
-            {
-                "text": "筛选有效水表记录",
-                "tone": "中性"
-            },
-            {
-                "text": "水表状态有效性过滤",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "有效",
-            "过滤",
-            "筛选",
-            "meter_status",
-            "质量检查",
-            "断言",
-            "清洗",
-            "filter",
-            "quality"
-        ],
-        "intent_tags": [
-            "filter",
-            "quality"
-        ]
-    },
-    {
-        "id": "snpt_filter_meter_diameter_20mm",
-        "aliases": [
-            {
-                "text": "找出所有20mm水表用户",
-                "tone": "口语"
-            },
-            {
-                "text": "筛选20mm水表记录",
-                "tone": "中性"
-            },
-            {
-                "text": "20mm口径水表子集提取",
-                "tone": "专业"
-            }
-        ],
-        "keywords": [
-            "20mm",
-            "水表直径",
-            "过滤",
-            "筛选",
-            "meter_diameter",
-            "子集",
-            "分析",
-            "住宅用水",
-            "规格",
-            "filter",
-            "by_dimension"
-        ],
-        "intent_tags": [
-            "filter",
-            "by_dimension"
-        ]
-    }
-]
--- a/demo/水务/水务-qwen3-next-80b-snippet.json
+++ b/demo/水务/水务-qwen3-next-80b-snippet.json
@ -1,227 +0,0 @@
-[
-    {
-        "id": "snpt_topn_supply_office_by_account",
-        "desc": "统计各供应办公室对应的账户数量，识别高占比管理所",
-        "type": "topn",
-        "title": "按供应办公室统计账户数",
-        "examples": [
-            "哪个供水管理所服务的用户最多？",
-            "列出前5个账户数最多的供应办公室"
-        ],
-        "variables": [
-            {
-                "name": "top_n",
-                "type": "int",
-                "default": 11
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT supply_office, COUNT(DISTINCT account_id) AS account_count\nFROM water_meter_info\nGROUP BY supply_office\nORDER BY account_count DESC\nLIMIT {{top_n}};"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "供应办公室仅11个唯一值，可安全展示全部；建议LIMIT 11避免冗余排序"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 11
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "supply_office",
-                "account_id"
-            ]
-        },
-        "business_caliber": "粒度=供应办公室，指标=去重账户数（account_id），仅统计水表信息表中有效账户，不关联外部表"
-    },
-    {
-        "id": "snpt_topn_station_by_account",
-        "desc": "统计各站点服务的账户数量，识别高负载站点",
-        "type": "topn",
-        "title": "按站点统计账户分布",
-        "examples": [
-            "哪些站点服务的用户最多？",
-            "TOP10用户最多的站点是哪些？"
-        ],
-        "variables": [
-            {
-                "name": "top_n",
-                "type": "int",
-                "default": 20
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT station, COUNT(DISTINCT account_id) AS account_count\nFROM water_meter_info\nGROUP BY station\nORDER BY account_count DESC\nLIMIT {{top_n}};"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "站点有36个唯一值，建议LIMIT<=20以避免结果过长；高基数维度可能影响查询性能"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 36
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "station",
-                "account_id"
-            ]
-        },
-        "business_caliber": "粒度=站点（station），指标=去重账户数（account_id），基于水表信息表直接聚合，不涉及时间维度"
-    },
-    {
-        "id": "snpt_topn_district_by_account",
-        "desc": "统计各行政区的账户数量，辅助区域资源分配分析",
-        "type": "topn",
-        "title": "按行政区统计账户分布",
-        "examples": [
-            "哪个区的用水账户最多？",
-            "列出所有行政区的账户数量排名"
-        ],
-        "variables": [
-            {
-                "name": "top_n",
-                "type": "int",
-                "default": 13
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT district, COUNT(DISTINCT account_id) AS account_count\nFROM water_meter_info\nGROUP BY district\nORDER BY account_count DESC\nLIMIT {{top_n}};"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "行政区共13个，可完整展示；适合用于区域对比分析"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 13
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "district",
-                "account_id"
-            ]
-        },
-        "business_caliber": "粒度=行政区（district），指标=去重账户数（account_id），基于水表信息表聚合，反映各区域用户规模"
-    },
-    {
-        "id": "snpt_share_of_meter_type",
-        "desc": "计算各类水表类型在总账户中的占比，识别主流类型",
-        "type": "ratio",
-        "title": "水表类型占比分析",
-        "examples": [
-            "各类水表在用户中的占比是多少？",
-            "电磁式远传水表占总用户比例多少？"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT meter_type, \n       COUNT(DISTINCT account_id) AS account_count,\n       ROUND(COUNT(DISTINCT account_id) * 100.0 / SUM(COUNT(DISTINCT account_id)) OVER (), 2) AS percentage\nFROM water_meter_info\nGROUP BY meter_type\nORDER BY account_count DESC;"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "水表类型仅5种，适合计算占比；可直接展示全量分布"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 5
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "meter_type",
-                "account_id"
-            ]
-        },
-        "business_caliber": "粒度=水表类型（meter_type），指标=去重账户数占比，分母为全表去重账户总数，反映技术选型分布"
-    },
-    {
-        "id": "snpt_sample_account_service_point",
-        "desc": "随机抽取部分账户与服务点ID的原始记录，用于数据质量核查",
-        "type": "sample",
-        "title": "抽样账户与服务点明细",
-        "examples": [
-            "随机查看10条账户与服务点的详细信息",
-            "抽样检查水表信息是否符合预期格式"
-        ],
-        "variables": [
-            {
-                "name": "sample_size",
-                "type": "int",
-                "default": 10
-            }
-        ],
-        "dialect_sql": {
-            "mysql": "SELECT account_id, service_point_id, supply_office, station, district, meter_diameter, meter_type, meter_subtype, installation_position\nFROM water_meter_info\nORDER BY RAND()\nLIMIT {{sample_size}};"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "主键组合为account_id+service_point_id，适合抽样验证唯一性；建议样本量≤100"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": null
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "account_id",
-                "service_point_id"
-            ]
-        },
-        "business_caliber": "粒度=单条水表记录，抽取样本用于验证account_id与service_point_id的组合唯一性及维度字段完整性"
-    },
-    {
-        "id": "snpt_filter_meter_status_valid",
-        "desc": "过滤出水表状态为'有效'的记录，用于后续分析",
-        "type": "quality",
-        "title": "筛选有效水表记录",
-        "examples": [
-            "只取状态为有效的水表记录",
-            "确认所有水表是否均为有效状态"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT *\nFROM water_meter_info\nWHERE meter_status = '有效';"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "meter_status仅存在'有效'值，此条件恒成立；可用于数据清洗流程的显式过滤"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 1
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "meter_status"
-            ]
-        },
-        "business_caliber": "仅保留水表状态为'有效'的记录，因全表均为有效值，此过滤为冗余但可作为数据质量校验的显式断言"
-    },
-    {
-        "id": "snpt_filter_meter_diameter_20mm",
-        "desc": "筛选水表直径为20mm的记录，用于特定口径设备分析",
-        "type": "quality",
-        "title": "筛选20mm水表记录",
-        "examples": [
-            "找出所有使用20mm水表的用户",
-            "20mm水表分布在哪些站点？"
-        ],
-        "variables": [],
-        "dialect_sql": {
-            "mysql": "SELECT *\nFROM water_meter_info\nWHERE meter_diameter = '20mm';"
-        },
-        "applicability": {
-            "constraints": {
-                "notes": [
-                    "水表直径共8种枚举值，20mm为常见规格；可作为子集分析的起点"
-                ],
-                "fk_join_available": false,
-                "dim_cardinality_hint": 8
-            },
-            "time_column": "nullable",
-            "required_columns": [
-                "meter_diameter"
-            ]
-        },
-        "business_caliber": "粒度=单条水表记录，筛选条件为meter_diameter='20mm'，用于分析标准住宅用水表的分布特征"
-    }
-]
--- a/doc/rag-api.md
+++ b/doc/rag-api.md
@ -1,57 +0,0 @@
-#添加RAG
-curl --location --request POST 'http://127.0.0.1:8000/rag/add' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer  ' \
--data-raw '{
-    "id": 0,
-    "workspaceId": 0,
-    "name": "string",
-    "embeddingData": "string",
-    "type": "METRIC"
-}'
-
-#批量添加RAG
-curl --location --request POST 'http://127.0.0.1:8000/rag/addBatch' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer  ' \
--data-raw '[
-    {
-        "id": 0,
-        "workspaceId": 0,
-        "name": "string",
-        "embeddingData": "string",
-        "type": "METRIC"
-    }
-]'
-
-#更新RAG
-curl --location --request POST 'http://127.0.0.1:8000/rag/update' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer  ' \
--data-raw '{
-    "id": 0,
-    "workspaceId": 0,
-    "name": "string",
-    "embeddingData": "string",
-    "type": "METRIC"
-}'
-
-#删除RAG
-curl --location --request POST 'http://127.0.0.1:8000/rag/delete' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer  ' \
--data-raw '{
-    "id": 0,
-    "type": "METRIC"
-}'
-
-#检索RAG
-curl --location --request POST 'http://127.0.0.1:8000/rag/retrieve' \
--header 'Content-Type: application/json' \
--header 'Authorization: Bearer  ' \
--data-raw '{
-    "query": "string",
-    "num": 0,
-    "workspaceId": 0,
-    "type": "METRIC"
-}'
--- a/doc/会话api.md
+++ b/doc/会话api.md
@ -1,49 +0,0 @@
-# 创建会话
-curl -X POST "/api/v1/chat/sessions" \
-  -H "Content-Type: application/json" \
-  -d "{\"user_id\": $CHAT_USER_ID}"
-
-# 获取会话
-curl "/api/v1/chat/sessions/{session_id}"
-
-# 按用户列出会话
-curl "/api/v1/chat/sessions?user_id=$CHAT_USER_ID"
-
-# 更新会话状态
-curl -X POST "/api/v1/chat/sessions/{session_id}/update" \
-  -H "Content-Type: application/json" \
-  -d '{"status":"PAUSED"}'
-
-# 关闭会话
-curl -X POST "/api/v1/chat/sessions/{session_id}/close"
-
-# 创建对话轮次
-curl -X POST "/api/v1/chat/sessions/{session_id}/turns" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "user_id": '"$CHAT_USER_ID"',
-    "user_query": "展示昨天订单GMV",
-    "intent": "METRIC_QUERY",
-    "ast_json": {"select":["gmv"],"where":{"dt":"yesterday"}},
-    "main_metric_ids": [1234],
-    "created_metric_ids": []
-  }'
-
-# 获取单条对话轮次
-curl "/api/v1/chat/turns/{turn_id}"
-
-# 列出会话下的轮次
-curl "/api/v1/chat/sessions/{session_id}/turns"
-
-# 写入检索结果
-curl -X POST "/api/v1/chat/turns/{turn_id}/retrievals" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "retrievals": [
-      {"item_type":"METRIC","item_id":"metric_foo","used_in_sql":true,"rank_no":1},
-      {"item_type":"SNIPPET","item_id":"snpt_bar","similarity_score":0.77,"rank_no":2}
-    ]
-  }'
-
-# 列出轮次的检索结果
-curl "/api/v1/chat/turns/{turn_id}/retrievals"
--- a/doc/指标api.md
+++ b/doc/指标api.md
@ -1,69 +0,0 @@
-# 新建指标
-curl -X POST "/api/v1/metrics" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "metric_code": "metric_1234",
-    "metric_name": "订单数",
-    "biz_domain": "order",
-    "biz_desc": "订单总数",
-    "base_sql": "select count(*) as order_cnt from orders",
-    "time_grain": "DAY",
-    "dim_binding": ["dt"],
-    "update_strategy": "FULL",
-    "metric_aliases": ["订单量"],
-    "created_by": '"$METRIC_USER_ID"'
-  }'
-
-# 更新指标
-curl -X POST "/api/v1/metrics/{metric_id}" \
-  -H "Content-Type: application/json" \
-  -d '{"metric_name":"订单数-更新","is_active":false}'
-
-# 获取指标
-curl "/api/v1/metrics/{metric_id}"
-
-# 新建调度
-curl -X POST "/api/v1/metric-schedules" \
-  -H "Content-Type: application/json" \
-  -d '{"metric_id":{metric_id},"cron_expr":"0 2 * * *","priority":5,"enabled":true}'
-
-# 更新调度
-curl -X POST "/api/v1/metric-schedules/{schedule_id}" \
-  -H "Content-Type: application/json" \
-  -d '{"enabled":false,"retry_times":1}'
-
-# 列出某指标的调度
-curl "/api/v1/metrics/{metric_id}/schedules"
-
-# 触发运行
-curl -X POST "/api/v1/metric-runs/trigger" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "metric_id": {metric_id},
-    "triggered_by": "API",
-    "data_time_from": "2024-05-01T00:00:00Z",
-    "data_time_to": "2024-05-02T00:00:00Z"
-  }'
-
-# 列出运行
-curl "/api/v1/metric-runs?metric_id={metric_id}"
-
-# 获取单次运行
-curl "/api/v1/metric-runs/{run_id}"
-
-# 写入指标结果
-curl -X POST "/api/v1/metric-results/{metric_id}" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "metric_id": {metric_id},
-    "results": [
-      {"stat_time":"2024-05-01T00:00:00Z","metric_value":123.45,"data_version":"{run_id}"},
-      {"stat_time":"2024-05-02T00:00:00Z","metric_value":234.56,"data_version":"{run_id}"}
-    ]
-  }'
-
-# 查询指标结果
-curl "/api/v1/metric-results?metric_id={metric_id}"
-
-# 查询最新结果
-curl "/api/v1/metric-results/latest?metric_id={metric_id}"
--- a/doc/指标生成.md
+++ b/doc/指标生成.md
@ -1,83 +0,0 @@
-某个用户的一句问话 → 解析成某轮 chat_turn → 这轮用了哪些指标/知识/会话（chat_turn_retrieval） →
-是否产生了新的指标（metric_def） →
-是否触发了指标调度运行（metric_job_run.turn_id） →
-最终产生了哪些指标结果（metric_result.metric_id + stat_time）。
-
-会话域
-schema
-会话表 chat_session
-
-会话轮次表 chat_turn
-
-会话轮次检索关联表 chat_turn_retrieval
-
-
-API
-1. 创建会话
-POST /api/v1/chat/sessions
-2. 更新会话轮次
-POST /api/v1/chat/sessions/{session_id}/update
-3. 结束会话
-POST /api/v1/chat/sessions/{session_id}/close
-4. 查询会话
-GET /api/v1/chat/sessions/{session_id}
-5. 会话列表查询（按用户、时间）
-GET /api/v1/chat/sessions
-6. 创建问答轮次（用户发起 query）
-POST /api/v1/chat/sessions/{session_id}/turns
-7. 查询某会话的所有轮次
-GET /api/v1/chat/sessions/{session_id}/turns
-8. 查看单轮问答详情
-GET /api/v1/chat/turns/{turn_id}
-9. 批量写入某轮的检索结果
-POST /api/v1/chat/turns/{turn_id}/retrievals
-10. 查询某轮的检索记录
-GET /api/v1/chat/turns/{turn_id}/retrievals
-11. 更新某轮的检索记录（in future）
-POST /api/v1/chat/turns/{turn_id}/retrievals/update
-
-元数据域
-schema
-指标定义表 metric_def
-
-
-API
-12. 创建指标（来自问答或传统定义）
-POST /api/v1/metrics
-13. 更新指标
-POST /api/v1/metrics/{id}
-14. 获取指标详情
-GET /api/v1/metrics
-
-执行调度域（暂定airflow）
-schema
-指标调度配置表 metric_schedule
-
-调度运行记录表 metric_job_run
-
-API
-1. 创建调度配置
-POST /api/v1/metric-schedules
-2. 更新调度配置
-POST /api/v1/metric-schedules/{id}
-3. 查询指标调度配置详情
-GET /api/v1/metrics/{metric_id}/schedules
-4. 手动触发一次指标运行（例如来自问数）
-POST /api/v1/metric-runs/trigger
-5. 查询运行记录列表
-GET /api/v1/metric-runs
-6. 查询单次运行详情
-GET /api/metric-runs/{run_id}
-
-数据域
-schema
-指标结果表（纵表）metric_result
-
-
-API
-1. 查询指标结果（按时间段 & 维度）
-GET /api/metric-results
-2. 单点查询（最新值）
-GET /api/metric-results/latest
-3. 批量写入指标结果
-POST /api/v1/metric-results/{metrics_id}
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,13 +0,0 @@
-services:
-  app:
-    build: .
-    ports:
-      - "8060:8000"
-    volumes:
-      - .:/app
-    environment:
-      - PYTHONUNBUFFERED=1
-    # 开发模式：启用 --reload
-    command: uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
-    # 生产模式：注释上面 command，取消注释下面这行
-    # command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers 4
--- a/file/dataset/ecommerce_orders_clean.csv
+++ b/file/dataset/ecommerce_orders_clean.csv
--- a/file/ecommerce_orders.sql
+++ b/file/ecommerce_orders.sql
@ -1,21 +0,0 @@
-CREATE TABLE `ecommerce_orders` (
-  `order_id` char(36) COLLATE utf8mb4_unicode_ci NOT NULL COMMENT 'UUID from CSV',
-  `customer_id` int NOT NULL,
-  `product_id` int NOT NULL,
-  `category` varchar(64) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `price` decimal(10,2) NOT NULL,
-  `quantity` int NOT NULL,
-  `order_date` datetime(6) NOT NULL,
-  `shipping_date` datetime(6) NOT NULL,
-  `delivery_status` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `payment_method` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `device_type` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `channel` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `shipping_address` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `billing_address` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
-  `customer_segment` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
-  PRIMARY KEY (`order_id`),
-  KEY `idx_customer` (`customer_id`),
-  KEY `idx_product` (`product_id`),
-  KEY `idx_order_date` (`order_date`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
--- a/file/tableschema/action_results.sql
+++ b/file/tableschema/action_results.sql
@ -1,40 +0,0 @@
-CREATE TABLE `action_results` (
-  `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键',
-  `table_id` bigint NOT NULL COMMENT '表ID',
-  `version_ts` bigint NOT NULL COMMENT '版本时间戳（版本号）',
-  `action_type` enum('ge_profiling','ge_result_desc','snippet','snippet_alias') COLLATE utf8mb4_bin NOT NULL COMMENT '动作类型',
-  `status` enum('pending','running','success','failed','partial') COLLATE utf8mb4_bin NOT NULL DEFAULT 'pending' COMMENT '执行状态',
-  `llm_usage` json DEFAULT NULL COMMENT 'LLM token usage统计',
-  `error_code` varchar(128) COLLATE utf8mb4_bin DEFAULT NULL,
-  `error_message` text COLLATE utf8mb4_bin,
-  `started_at` datetime DEFAULT NULL,
-  `finished_at` datetime DEFAULT NULL,
-  `duration_ms` int DEFAULT NULL,
-  `table_schema_version_id` varchar(19) COLLATE utf8mb4_bin NOT NULL,
-  `table_schema` json NOT NULL,
-  `ge_profiling_json` json DEFAULT NULL COMMENT 'Profiling完整结果JSON',
-  `ge_profiling_json_size_bytes` bigint DEFAULT NULL,
-  `ge_profiling_summary` json DEFAULT NULL COMMENT 'Profiling摘要（剔除大value_set等）',
-  `ge_profiling_summary_size_bytes` bigint DEFAULT NULL,
-  `ge_profiling_total_size_bytes` bigint DEFAULT NULL COMMENT '上两者合计',
-  `ge_profiling_html_report_url` varchar(1024) COLLATE utf8mb4_bin DEFAULT NULL COMMENT 'GE报告HTML路径/URL',
-  `ge_result_desc_json` json DEFAULT NULL COMMENT '表描述结果JSON',
-  `ge_result_desc_json_size_bytes` bigint DEFAULT NULL,
-  `snippet_json` json DEFAULT NULL COMMENT 'SQL知识片段结果JSON',
-  `snippet_json_size_bytes` bigint DEFAULT NULL,
-  `snippet_alias_json` json DEFAULT NULL COMMENT 'SQL片段改写/丰富结果JSON',
-  `snippet_alias_json_size_bytes` bigint DEFAULT NULL,
-  `callback_url` varchar(1024) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
-  `result_checksum` varbinary(32) DEFAULT NULL COMMENT '对当前action有效载荷计算的MD5/xxhash',
-  `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
-  `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-  `model` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型名称',
-  `model_provider` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型渠道',
-  `model_params` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型参数，如温度',
-  PRIMARY KEY (`id`),
-  UNIQUE KEY `uq_table_ver_action` (`table_id`,`version_ts`,`action_type`),
-  KEY `idx_status` (`status`),
-  KEY `idx_table` (`table_id`,`updated_at`),
-  KEY `idx_action_time` (`action_type`,`version_ts`),
-  KEY `idx_schema_version` (`table_schema_version_id`)
-) ENGINE=InnoDB AUTO_INCREMENT=113 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin ROW_FORMAT=DYNAMIC COMMENT='数据分析知识片段表';
--- a/file/tableschema/chat.sql
+++ b/file/tableschema/chat.sql
@ -1,103 +0,0 @@
-CREATE TABLE IF NOT EXISTS chat_session (
-    id               BIGINT AUTO_INCREMENT PRIMARY KEY,
-    user_id          BIGINT NOT NULL,
-    session_uuid     CHAR(36) NOT NULL,              -- 可用于对外展示的ID（UUID）
-    end_time         DATETIME NULL,
-    status           VARCHAR(16) NOT NULL DEFAULT 'OPEN', -- OPEN/CLOSED/ABANDONED
-    last_turn_id     BIGINT NULL,                    -- 指向 chat_turn.id
-    ext_context      JSON NULL,                      -- 业务上下文
-    created_at       DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    updated_at       DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-    UNIQUE KEY uk_session_uuid (session_uuid),
-    KEY idx_user_time (user_id, created_at),
-    KEY idx_status_time (status, created_at),
-    KEY idx_last_turn (last_turn_id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-
-CREATE TABLE IF NOT EXISTS chat_turn (
-    id                BIGINT AUTO_INCREMENT,
-    session_id        BIGINT NOT NULL,                   -- 关联 chat_session.id
-    turn_no           INT NOT NULL,                      -- 会话内轮次序号（1,2,3...）
-    user_id           BIGINT NOT NULL,
-
-    user_query        TEXT NOT NULL,                     -- 原始用户问句
-    intent            VARCHAR(64) NULL,                  -- METRIC_QUERY/METRIC_EXPLAIN 等
-    ast_json          JSON NULL,                         -- 解析出来的 AST
-
-    generated_sql     MEDIUMTEXT NULL,                   -- 生成的最终SQL
-    sql_status        VARCHAR(32) NULL,                  -- SUCCESS/FAILED/SKIPPED
-    error_msg         TEXT NULL,                         -- SQL生成/执行错误信息
-
-    main_metric_ids       JSON NULL,                     -- 本轮涉及的指标ID列表
-    created_metric_ids    JSON NULL,                     -- 本轮新建指标ID列表
-
-    end_time          DATETIME NULL,
-
-    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-    -- 主键改为联合主键，必须包含 created_at
-    PRIMARY KEY (id, created_at),
-    KEY idx_session_turn (session_id, turn_no),
-    KEY idx_session_time (session_id, created_at),
-    KEY idx_intent_time (intent, created_at),
-    KEY idx_user_time (user_id, created_at)
-)
-ENGINE=InnoDB
-DEFAULT CHARSET=utf8mb4
-PARTITION BY RANGE COLUMNS(created_at) (
-    -- 历史数据分区（根据实际需求调整）
-    PARTITION p202511 VALUES LESS THAN ('2025-12-01'),
-    PARTITION p202512 VALUES LESS THAN ('2026-01-01'),
-    -- 2026年按月分区
-    PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
-    PARTITION p202602 VALUES LESS THAN ('2026-03-01'),
-    PARTITION p202603 VALUES LESS THAN ('2026-04-01'),
-    PARTITION p202604 VALUES LESS THAN ('2026-05-01'),
-    PARTITION p202605 VALUES LESS THAN ('2026-06-01'),
-    PARTITION p202606 VALUES LESS THAN ('2026-07-01'),
-    -- ... 可以预建几个月 ...
-    
-    -- 兜底分区，存放未来的数据，防止插入报错
-    PARTITION p_future VALUES LESS THAN (MAXVALUE)
-);
-
-
-CREATE TABLE IF NOT EXISTS chat_turn_retrieval (
-    id                BIGINT AUTO_INCREMENT,
-    turn_id           BIGINT NOT NULL,                 -- 关联 qa_turn.id
-
-    item_type         VARCHAR(32) NOT NULL,            -- METRIC/SNIPPET/CHAT
-    item_id           VARCHAR(128) NOT NULL,           -- metric_id/snippet_id/table_name 等
-    item_extra        JSON NULL,                       -- 附加信息，如字段名等
-
-    similarity_score  DECIMAL(10,6) NULL,              -- 相似度
-    rank_no           INT NULL,                        -- 检索排名
-    used_in_reasoning TINYINT(1) NOT NULL DEFAULT 0,   -- 是否参与推理
-    used_in_sql       TINYINT(1) NOT NULL DEFAULT 0,   -- 是否影响最终SQL
-
-    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-    -- 主键改为联合主键，必须包含 created_at
-    PRIMARY KEY (id, created_at),
-    KEY idx_turn (turn_id),
-    KEY idx_turn_type (turn_id, item_type),
-    KEY idx_item (item_type, item_id)
-)
-ENGINE=InnoDB
-DEFAULT CHARSET=utf8mb4
-PARTITION BY RANGE COLUMNS(created_at) (
-    -- 历史数据分区（根据实际需求调整）
-    PARTITION p202511 VALUES LESS THAN ('2025-12-01'),
-    PARTITION p202512 VALUES LESS THAN ('2026-01-01'),
-    -- 2026年按月分区
-    PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
-    PARTITION p202602 VALUES LESS THAN ('2026-03-01'),
-    PARTITION p202603 VALUES LESS THAN ('2026-04-01'),
-    PARTITION p202604 VALUES LESS THAN ('2026-05-01'),
-    PARTITION p202605 VALUES LESS THAN ('2026-06-01'),
-    PARTITION p202606 VALUES LESS THAN ('2026-07-01'),
-    -- ... 可以预建几个月 ...
-    
-    -- 兜底分区，存放未来的数据，防止插入报错
-    PARTITION p_future VALUES LESS THAN (MAXVALUE)
-);
--- a/file/tableschema/metrics.sql
+++ b/file/tableschema/metrics.sql
@ -1,155 +0,0 @@
-CREATE TABLE metric_def (
-    id                BIGINT AUTO_INCREMENT PRIMARY KEY,
-    
-    metric_code       VARCHAR(64) NOT NULL,          -- 内部编码：order_cnt_delivery
-    metric_name       VARCHAR(128) NOT NULL,         -- 中文名：外送订单数
-    metric_aliases    JSON NULL,                     -- 别名列表
-
-    biz_domain        VARCHAR(64) NOT NULL,          -- 通过table tag获取，支持人工配置
-    biz_desc          TEXT NULL,                     -- 业务口径描述
-    
-    chat_turn_id      BIGINT NULL,                   -- 来自哪轮会话
-    
-    tech_desc         TEXT NULL,                     -- 技术口径描述
-    formula_expr      TEXT NULL,                     -- 公式描述："sum(pay_amount)"
-    base_sql          MEDIUMTEXT NOT NULL,           -- 标准计算SQL（逻辑SQL/snippet）
-
-    time_grain        VARCHAR(32) NOT NULL,          -- DAY/HOUR/WEEK/MONTH
-    dim_binding       JSON NOT NULL,                 -- 维度绑定，如 ["dt","store_id","channel"]
-
-    update_strategy   VARCHAR(32) NOT NULL,          -- FULL/INCR/REALTIME
-    schedule_id       BIGINT NULL,                   -- 调度ID
-    schedule_type     INT NULL,                      -- 调度类型，默认调度cron
-    
-    version           INT NOT NULL DEFAULT 1,
-    is_active         TINYINT(1) NOT NULL DEFAULT 1,
-
-    sql_hash          VARCHAR(64) NULL,              -- base_sql hash 用于版本比较
-    created_by        BIGINT NULL,
-    updated_by        BIGINT NULL,
-
-    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-
-    UNIQUE KEY uk_metric_code (metric_code),
-    KEY idx_domain_active (biz_domain, is_active),
-    KEY idx_update_strategy (update_strategy),
-    KEY idx_name (metric_name)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-
-
-CREATE TABLE metric_schedule (
-    id                BIGINT AUTO_INCREMENT PRIMARY KEY,
-    metric_id         BIGINT NOT NULL,                -- 关联 metric_def.id
-
-    cron_expr         VARCHAR(64) NOT NULL,           -- 调度表达式
-    enabled           TINYINT(1) NOT NULL DEFAULT 1,  -- 是否启用
-    priority          INT NOT NULL DEFAULT 10,        -- 优先级
-
-    backfill_allowed  TINYINT(1) NOT NULL DEFAULT 1,  -- 是否允许补数
-    max_runtime_sec   INT NULL,                       -- 最大运行时长（秒）
-    retry_times       INT NOT NULL DEFAULT 0,         -- 失败重试次数
-
-    owner_team        VARCHAR(64) NULL,
-    owner_user_id     BIGINT NULL,
-
-    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-
-    KEY idx_metric_enabled (metric_id, enabled),
-    KEY idx_owner (owner_team, owner_user_id)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
-
-
-CREATE TABLE metric_job_run (
-    id                BIGINT AUTO_INCREMENT,
-
-    metric_id         BIGINT NOT NULL,                -- metric_def.id
-    schedule_id       BIGINT NULL,                    -- metric_schedule.id，手动触发则可为空
-    source_turn_id    BIGINT NULL,                    -- 若本次运行由某次问答触发，关联 qa_turn.id
-
-    data_time_from    DATETIME NULL,                  -- 指标统计时间窗口起
-    data_time_to      DATETIME NULL,                  -- 指标统计时间窗口止
-
-    metric_version    INT NOT NULL,                   -- 执行时使用的指标版本
-    base_sql_snapshot MEDIUMTEXT NOT NULL,            -- 本次执行使用的SQL快照
-
-    status            VARCHAR(32) NOT NULL,           -- RUNNING/SUCCESS/FAILED/SKIPPED
-    error_msg         TEXT NULL,
-
-    affected_rows     BIGINT NULL,                    -- 写入行数
-    runtime_ms        BIGINT NULL,                    -- 执行耗时
-
-    triggered_by      VARCHAR(32) NOT NULL,           -- SCHEDULER/MANUAL/API/QA_TURN
-    triggered_at      DATETIME NOT NULL,
-    started_at        DATETIME NULL,
-    finished_at       DATETIME NULL,
-
-    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-    -- 主键改为联合主键，必须包含 created_at
-    PRIMARY KEY (id, created_at),
-    KEY idx_metric_time (metric_id, data_time_from, data_time_to),
-    KEY idx_status_time (status, triggered_at),
-    KEY idx_schedule (schedule_id),
-    KEY idx_source_turn (source_turn_id)
-)
-ENGINE=InnoDB
-DEFAULT CHARSET=utf8mb4
-PARTITION BY RANGE COLUMNS(created_at) (
-    -- 历史数据分区（根据实际需求调整）
-    PARTITION p202511 VALUES LESS THAN ('2025-12-01'),
-    PARTITION p202512 VALUES LESS THAN ('2026-01-01'),
-    -- 2026年按月分区
-    PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
-    PARTITION p202602 VALUES LESS THAN ('2026-03-01'),
-    PARTITION p202603 VALUES LESS THAN ('2026-04-01'),
-    PARTITION p202604 VALUES LESS THAN ('2026-05-01'),
-    PARTITION p202605 VALUES LESS THAN ('2026-06-01'),
-    PARTITION p202606 VALUES LESS THAN ('2026-07-01'),
-    -- ... 可以预建几个月 ...
-    
-    -- 兜底分区，存放未来的数据，防止插入报错
-    PARTITION p_future VALUES LESS THAN (MAXVALUE)
-);
-
-
-CREATE TABLE metric_result (
-    id                BIGINT AUTO_INCREMENT,
-
-    metric_id         BIGINT NOT NULL,                -- metric_def.id
-    metric_version    INT NOT NULL,                   -- metric_def.version
-    stat_time         DATETIME NOT NULL,              -- 按 time_grain 对齐后的时间
-
-    extra_dims        JSON NULL,                      -- 其他维度，JSON 存
-
-    metric_value      DECIMAL(32,8) NOT NULL,         -- 指标结果值
-
-    load_time         DATETIME NOT NULL,              -- 入库时间
-    data_version      BIGINT NULL,                    -- 版本或 job_run id
-
-    created_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
-    updated_at        DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-    -- 主键改为联合主键，必须包含 created_at
-    PRIMARY KEY (id, created_at),
-    KEY idx_metric_time (metric_id, stat_time),
-    KEY idx_load_time (load_time)
-)
-ENGINE=InnoDB
-DEFAULT CHARSET=utf8mb4
-PARTITION BY RANGE COLUMNS(created_at) (
-    -- 历史数据分区（根据实际需求调整）
-    PARTITION p202511 VALUES LESS THAN ('2025-12-01'),
-    PARTITION p202512 VALUES LESS THAN ('2026-01-01'),
-    -- 2026年按月分区
-    PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
-    PARTITION p202602 VALUES LESS THAN ('2026-03-01'),
-    PARTITION p202603 VALUES LESS THAN ('2026-04-01'),
-    PARTITION p202604 VALUES LESS THAN ('2026-05-01'),
-    PARTITION p202605 VALUES LESS THAN ('2026-06-01'),
-    PARTITION p202606 VALUES LESS THAN ('2026-07-01'),
-    -- ... 可以预建几个月 ...
-    
-    -- 兜底分区，存放未来的数据，防止插入报错
-    PARTITION p_future VALUES LESS THAN (MAXVALUE)
-);
--- a/file/tableschema/rag_snippet.sql
+++ b/file/tableschema/rag_snippet.sql
@ -1,24 +0,0 @@
-CREATE TABLE `rag_snippet` (
-  `rag_item_id` bigint NOT NULL COMMENT 'RAG item id (stable hash of table/version/snippet_id)',
-  `workspace_id` bigint NOT NULL COMMENT 'RAG workspace scope',
-  `table_id` bigint NOT NULL COMMENT '来源表ID',
-  `version_ts` bigint NOT NULL COMMENT '表版本号',
-  `action_result_id` bigint NOT NULL COMMENT '来源 action_results 主键ID（snippet_alias 或 snippet 行）',
-  `snippet_id` varchar(255) COLLATE utf8mb4_bin NOT NULL COMMENT '原始 snippet id',
-  `rag_text` text COLLATE utf8mb4_bin NOT NULL COMMENT '用于向量化的拼接文本',
-  `merged_json` json NOT NULL COMMENT '合并后的 snippet 对象',
-  `created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '写入时间，用于分区',
-  `updated_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-  PRIMARY KEY (`rag_item_id`,`created_at`),
-  KEY `idx_action_result` (`action_result_id`),
-  KEY `idx_workspace` (`workspace_id`),
-  KEY `idx_table_version` (`table_id`,`version_ts`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin
-PARTITION BY RANGE COLUMNS (`created_at`) (
-  PARTITION p202401 VALUES LESS THAN ('2024-02-01'),
-  PARTITION p202402 VALUES LESS THAN ('2024-03-01'),
-  PARTITION p202403 VALUES LESS THAN ('2024-04-01'),
-  PARTITION p202404 VALUES LESS THAN ('2024-05-01'),
-  PARTITION p202405 VALUES LESS THAN ('2024-06-01'),
-  PARTITION p_future VALUES LESS THAN (MAXVALUE)
-) COMMENT='RAG snippet 索引缓存';
--- a/file/tableschema/table_snippet.sql
+++ b/file/tableschema/table_snippet.sql
@ -1,40 +0,0 @@
-CREATE TABLE `action_results` (
-  `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键',
-  `table_id` bigint NOT NULL COMMENT '表ID',
-  `version_ts` bigint NOT NULL COMMENT '版本时间戳（版本号）',
-  `action_type` enum('ge_profiling','ge_result_desc','snippet','snippet_alias') COLLATE utf8mb4_bin NOT NULL COMMENT '动作类型',
-  `status` enum('pending','running','success','failed','partial') COLLATE utf8mb4_bin NOT NULL DEFAULT 'pending' COMMENT '执行状态',
-  `model` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型名称',
-  `model_provider` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型渠道',
-  `model_params` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL COMMENT '模型参数，如温度',
-  `llm_usage` json DEFAULT NULL COMMENT 'LLM token usage统计',
-  `error_code` varchar(128) COLLATE utf8mb4_bin DEFAULT NULL,
-  `error_message` text COLLATE utf8mb4_bin,
-  `started_at` datetime DEFAULT NULL,
-  `finished_at` datetime DEFAULT NULL,
-  `duration_ms` int DEFAULT NULL,
-  `table_schema_version_id` varchar(19) COLLATE utf8mb4_bin NOT NULL,
-  `table_schema` json NOT NULL,
-  `ge_profiling_json` json DEFAULT NULL COMMENT 'Profiling完整结果JSON',
-  `ge_profiling_json_size_bytes` bigint DEFAULT NULL,
-  `ge_profiling_summary` json DEFAULT NULL COMMENT 'Profiling摘要（剔除大value_set等）',
-  `ge_profiling_summary_size_bytes` bigint DEFAULT NULL,
-  `ge_profiling_total_size_bytes` bigint DEFAULT NULL COMMENT '上两者合计',
-  `ge_profiling_html_report_url` varchar(1024) COLLATE utf8mb4_bin DEFAULT NULL COMMENT 'GE报告HTML路径/URL',
-  `ge_result_desc_json` json DEFAULT NULL COMMENT '表描述结果JSON',
-  `ge_result_desc_json_size_bytes` bigint DEFAULT NULL,
-  `snippet_json` json DEFAULT NULL COMMENT 'SQL知识片段结果JSON',
-  `snippet_json_size_bytes` bigint DEFAULT NULL,
-  `snippet_alias_json` json DEFAULT NULL COMMENT 'SQL片段改写/丰富结果JSON',
-  `snippet_alias_json_size_bytes` bigint DEFAULT NULL,
-  `callback_url` varchar(1024) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL,
-  `result_checksum` varbinary(32) DEFAULT NULL COMMENT '对当前action有效载荷计算的MD5/xxhash',
-  `created_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
-  `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
-  PRIMARY KEY (`id`),
-  UNIQUE KEY `uq_table_ver_action` (`table_id`,`version_ts`,`action_type`),
-  KEY `idx_status` (`status`),
-  KEY `idx_table` (`table_id`,`updated_at`),
-  KEY `idx_action_time` (`action_type`,`version_ts`),
-  KEY `idx_schema_version` (`table_schema_version_id`)
-) ENGINE=InnoDB AUTO_INCREMENT=53 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin ROW_FORMAT=DYNAMIC COMMENT='数据分析知识片段表';
--- a/ge_v1.py
+++ b/ge_v1.py
@ -121,7 +121,7 @@ def clean_value(value: Any) -> Any:
    if isinstance(value, (np.generic,)):
        return value.item()
    if isinstance(value, pd.Timestamp):
-        return str(value)
+        return value.isoformat()
    if pd.isna(value):
        return None
    return value
--- a/logging.yaml
+++ b/logging.yaml
@ -1,30 +0,0 @@
-version: 1
-formatters:
-  standard:
-    format: "%(asctime)s %(levelname)s %(name)s:%(lineno)d %(message)s"
-handlers:
-  console:
-    class: logging.StreamHandler
-    level: INFO
-    formatter: standard
-    stream: ext://sys.stdout
-  file:
-    class: logging.handlers.RotatingFileHandler
-    level: INFO
-    formatter: standard
-    filename: logs/app.log
-    maxBytes: 10485760  # 10 MB
-    backupCount: 5
-    encoding: utf-8
-loggers:
-  app:
-    level: INFO
-    handlers:
-      - console
-      - file
-    propagate: no
-root:
-  level: INFO
-  handlers:
-    - console
-    - file
--- a/main.py
+++ b/main.py
@ -1,6 +0,0 @@
-def main():
-    print("Hello from data-ge-new!")
-
-
-if __name__ == "__main__":
-    main()
--- a/project.md
+++ b/project.md
@ -1,23 +0,0 @@
-项目结构与逻辑
-
-app/main.py：创建 FastAPI 应用与生命周期，初始化共享 httpx.AsyncClient 和 LLMGateway，统一异常处理后暴露四个接口：聊天代理、导入分析、表画像流水线、表片段入库。
-app/models.py：定义所有请求/响应模型与枚举（LLM 请求、导入分析作业、表画像作业、片段入库等），并给出字段校验与默认值。
-app/services：核心业务逻辑
-gateway.py 将 /v1/chat/completions 请求转发到 NEW_API_BASE_URL（带可选 Bearer Token），并归一化返回。
-import_analysis.py 组装导入提示词（prompt/data_import_analysis.md）、解析/截断样本、调用统一聊天接口、抽取 JSON 结果与 token 用量，最后回调业务方。
-table_profiling.py 串行执行 4 步流水线：Great Expectations profiling → LLM 结果描述（prompt/ge_result_desc_prompt.md）→ 片段生成（prompt/snippet_generator.md）→ 片段别名（prompt/snippet_alias_generator.md），每步都回调状态与结果。
-table_snippet.py 将各步骤结果 upsert 到数据库表，自动序列化 JSON/大小信息并构造 INSERT ... ON DUPLICATE KEY UPDATE。
-app/providers/*：各云厂商直连客户端（OpenAI/Anthropic/OpenRouter/Gemini/Qwen/DeepSeek），实现统一 chat 接口；当前主流程通过 new-api 转发，但保留直连能力。
-prompt/ 存放提示词模板；scripts/ 与 test/ 目录提供接口调用示例和回归样本；table_snippet.sql 给出 action_results 表结构（用于片段与 profiling 结果持久化）。
-功能/需求说明
-
-LLM 网关：POST /v1/chat/completions 接收 LLMRequest（provider+model+messages 等），将 payload 透传到 NEW_API_BASE_URL/v1/chat/completions，带可选 NEW_API_AUTH_TOKEN 认证；异常时返回 4xx/5xx 并记录原始响应。
-导入分析（异步）：POST /v1/import/analyze 接收导入样本（rows/headers/raw_csv/table_schema）、目标模型 llm_model（默认 DEFAULT_IMPORT_MODEL，可被 IMPORT_SUPPORTED_MODELS 白名单限制）、温度与回调地址。服务将样本转 CSV、附加 schema，拼接系统+用户消息后调用统一聊天接口，解析首个 choice 中的 JSON 作为分析结果，连同 LLM usage 一并以回调形式返回；失败时回调 status=failed 与错误信息。
-表画像流水线（异步）：POST /v1/table/profiling 接收表标识、版本号、回调地址及 GE/LLM 配置（datasource/batch_request、连接串模板、LLM 模型与超时）。流水线按顺序执行：
-Great Expectations profiling（可指定 profiler 类型、datasource、runtime SQL 查询/表），生成完整与摘要 JSON 及 Data Docs 路径；
-调用聊天接口生成 GE 结果描述 JSON；
-基于描述生成 SQL 片段数组；
-生成片段别名/关键词。
-每步成功/失败都会回调，payload 包含 action_type、结果 JSON、模型、llm_usage、报错信息等。
-片段结果入库：POST /v1/table/snippet 接收 TableSnippetUpsertRequest（表/版本、action 类型、状态、schema、模型信息、各阶段 JSON 及大小、错误码、时间戳等），组装到 action_results 表进行 UPSERT，返回是否更新已有记录。
-配置与运行要求：核心环境变量在 app/settings.py（API Keys、DEFAULT_IMPORT_MODEL、IMPORT_GATEWAY_BASE_URL/NEW_API_BASE_URL、模型白名单、数据库 URL 等）；日志使用 logging.yaml 自动创建 logs/；HTTP 客户端超时/代理可通过 HTTP_CLIENT_TIMEOUT、HTTP_CLIENT_TRUST_ENV、HTTP_CLIENT_PROXY 控制。 调试可用 uvicorn app.main:app --reload，Docker 由 Dockerfile/docker-compose.yml 提供。
--- a/prompt/ge_result_desc_prompt.md
+++ b/prompt/ge_result_desc_prompt.md
@ -1,47 +0,0 @@
-系统角色（System）
-你是“数据画像抽取器”。输入是一段 Great Expectations 的 profiling/validation 结果 JSON，
-可能包含：列级期望（expect_*）、统计、样例值、类型推断等；也可能带表级/批次元数据。
-请将其归一化为一个可被程序消费的“表画像”JSON，对不确定项给出置信度与理由。
-禁止臆造不存在的列、时间范围或数值。
-
-用户消息（User）
-【输入：GE结果JSON】
-{{GE_RESULT_JSON}}
-
-【输出要求（只输出JSON，不要解释文字）】
-{
-  "table": "<库.表 或 表名>",
-  "row_count": <int|null>,                             // 若未知可为 null
-  "role": "fact|dimension|unknown",                    // 依据指标/维度占比与唯一性启发式
-  "grain": ["<列1>", "<列2>", ...],                    // 事实粒度猜测（如含 dt/店/类目）
-  "time": { "column": "<name>|null", "granularity": "day|week|month|unknown", "range": ["YYYY-MM-DD","YYYY-MM-DD"]|null, "has_gaps": true|false|null },
-  "columns": [
-    {
-      "name": "<col>",
-      "dtype": "<ge推断/物理类型>",
-      "semantic_type": "dimension|metric|time|text|id|unknown",
-      "null_rate": <0~1|null>,
-      "distinct_count": <int|null>,
-      "distinct_ratio": <0~1|null>,
-      "stats": { "min": <number|string|null>,"max": <number|string|null>,"mean": <number|null>,"std": <number|null>,"skewness": <number|null> },
-      "enumish": true|false|null,                      // 低熵/可枚举
-      "top_values": [{"value":"<v>","pct":<0~1>}, ...],// 取前K个（≤10）
-      "pk_candidate_score": <0~1>,                     // 唯一性+非空综合评分
-      "metric_candidate_score": <0~1>,                 // 数值/偏态/业务词命中
-      "comment": "<列注释或GE描述|可为空>"
-    }
-  ],
-  "primary_key_candidates": [["colA","colB"], ...],    // 依据 unique/compound unique 期望
-  "fk_candidates": [{"from":"<col>","to":"<dim_table(col)>","confidence":<0~1>}],
-  "quality": {
-    "failed_expectations": [{"name":"<expect_*>","column":"<col|table>","summary":"<一句话>"}],
-    "warning_hints": ["空值率>0.2的列: ...", "时间列存在缺口: ..."]
-  },
-  "confidence_notes": ["<为什么判定role/grain/time列>"]
-}
-
-【判定规则（简要）】
- time列：类型为日期/时间 OR 命中 dt/date/day 等命名；若有 min/max 可给出 range；若间隔缺口≥1天记 has_gaps=true。
- semantic_type：数值+右偏/方差大→更偏 metric；高唯一/ID命名→id；高基数+文本→text；低熵+有限取值→dimension。
- role：metric列占比高且存在time列→倾向 fact；几乎全是枚举/ID且少数值→dimension。
- 置信不高时给出 null 或 unknown，并写入 confidence_notes。
--- a/prompt/snippet_alias_generator.md
+++ b/prompt/snippet_alias_generator.md
@ -1,52 +0,0 @@
-系统角色（System）
-你是“SQL片段别名生成器”。
-输入为一个或多个 SQL 片段对象（来自 snippet.json），输出为针对每个片段生成的多样化别名（口语 / 中性 / 专业）、关键词与意图标签。
-要求逐个处理所有片段对象，输出同样数量的 JSON 元素。
-
-用户消息（User）
-【上下文】
-
-SQL片段对象数组：{{SNIPPET_ARRAY}} // snippet.json中的一个或多个片段
-
-【任务要求】
-请针对输入数组中的 每个 SQL 片段，输出一个 JSON 对象，结构如下：
-
-{
-  "id": "<与输入片段id一致>",
-  "aliases": [
-    {"text": "…", "tone": "口语|中性|专业"},
-    {"text": "…", "tone": "专业"}
-  ],
-  "keywords": [
-    "GMV","销售额","TopN","category","类目","趋势","同比","客户","订单","质量","异常检测","join","过滤","sample"
-  ],
-  "intent_tags": ["aggregate","trend","topn","ratio","quality","join","sample","filter","by_dimension"]
-}
-
-生成逻辑规范
-1.逐条输出：输入数组中每个片段对应一个输出对象（id 保持一致）。
-
-2.aliases生成
-至少 3 个别名，分别覆盖语气类型：口语 / 中性 / 专业。
-≤20字，语义需等价，不得添加不存在的字段或业务口径。
-示例：
-  GMV趋势分析（中性）
-  每天卖多少钱（口语）
-  按日GMV曲线（专业）
-3.keywords生成
-8~15个关键词，需涵盖片段核心维度、指标、分析类型和语义近义词。
-中英文混合（如 "GMV"/"销售额"、"同比"/"YoY"、"类目"/"category" 等）。
-包含用于匹配的分析意图关键词（如 “趋势”、“排行”、“占比”、“质量检查”、“过滤” 等）。
-
-4.intent_tags生成
-
-从以下集合中选取，与片段type及用途一致：
-["aggregate","trend","topn","ratio","quality","join","sample","filter","by_dimension"]
-
-若为条件片段（WHERE句型），补充 "filter"；若含维度分组逻辑，补充 "by_dimension"。
-
-5.语言与内容要求
-
-保持正式书面风格，不添加解释说明。
-
-只输出JSON数组，不包含文字描述或额外文本。
--- a/prompt/snippet_generator.md
+++ b/prompt/snippet_generator.md
@ -1,46 +0,0 @@
-系统角色（System）
-你是“SQL片段生成器”。只能基于给定“表画像”生成可复用的分析片段。
-为每个片段产出：标题、用途描述、片段类型、变量、适用条件、SQL模板（mysql方言），并注明业务口径与安全限制。
-不要发明画像里没有的列。时间/维度/指标须与画像匹配。
-
-用户消息（User）
-【表画像JSON】
-{{TABLE_PROFILE_JSON}}
-
-【输出要求（只输出JSON数组）】
-[
-  {
-    "id": "snpt_<slug>",
-    "title": "中文标题（≤16字）",
-    "desc": "一句话用途",
-    "type": "aggregate|trend|topn|ratio|quality|join|sample",
-    "applicability": {
-      "required_columns": ["<col>", ...],
-      "time_column": "<dt|nullable>",
-      "constraints": {
-        "dim_cardinality_hint": <int|null>,            // 用于TopN限制与性能提示
-        "fk_join_available": true|false,
-        "notes": ["高基数维度建议LIMIT<=50", "..."]
-      }
-    },
-    "variables": [
-      {"name":"start_date","type":"date"},
-      {"name":"end_date","type":"date"},
-      {"name":"top_n","type":"int","default":10}
-    ],
-    "dialect_sql": {
-      "mysql": ""
-    },
-    "business_caliber": "清晰口径说明，如 UV以device_id去重；粒度=日-类目",
-    "examples": ["示例问法1","示例问法2"]
-  }
-]
-
-【片段选择建议】
- 若存在 time 列：生成 trend_by_day / yoy_qoq / moving_avg。
- 若存在 enumish 维度（distinct 5~200）：生成 topn_by_dimension / share_of_total。
- 若 metric 列：生成 sum/avg/max、分位数/异常检测（3σ/箱线）。
- 有主键/唯一：生成 去重/明细抽样/质量检查。
- 有 fk_candidates：同时生成“join维表命名版”和“纯ID版”。
- 高枚举维度：在 constraints.notes 中强调 LIMIT 建议与可能的性能风险。
- 除了完整的sql片段，还有sql里部分内容的sql片段，比如 where payment_method = 'Credit Card' and delivery_status = 'Deliverd' 的含义是支付方式为信用卡且配送状态是已送达
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,21 +0,0 @@
-[project]
-name = "data-ge-new"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.11"
-dependencies = [
-    "fastapi>=0.111.0",
-    "uvicorn[standard]>=0.29.0",
-    "pydantic>=2.6.0",
-    "sqlalchemy>=2.0.28",
-    "pymysql>=1.1.0",
-    "great-expectations[profilers]==0.18.19",
-    "pandas>=2.0",
-    "numpy>=1.24",
-    "openpyxl>=3.1",
-    "httpx==0.27.2",
-    "python-dotenv==1.0.1",
-    "requests>=2.31.0",
-    "PyYAML>=6.0.1",
-]
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,11 @@
+fastapi>=0.111.0
+uvicorn[standard]>=0.29.0
+pydantic>=2.6.0
+sqlalchemy>=2.0.28
+pymysql>=1.1.0
+great_expectations>=0.18.0,<0.19.0
+pandas>=2.0
+numpy>=1.24
+openpyxl>=3.1
+httpx==0.27.2
+python-dotenv==1.0.1
--- a/scripts/huggingface_download.py
+++ b/scripts/huggingface_download.py
@ -1,226 +0,0 @@
-import argparse
-import logging
-import os
-from typing import Dict, Iterable, List, Optional
-
-import datasets
-from datasets import DownloadConfig
-from huggingface_hub import snapshot_download
-
-# 批量下载 Hugging Face 上的数据集和模型
-# 支持通过命令行参数配置代理和下载参数，如超时和重试次数，支持批量循环下载，存储到file目录下dataset和model子目录
-
-
-def _parse_id_list(values: Iterable[str]) -> List[str]:
-    """将多次传入以及逗号分隔的标识整理为列表."""
-    ids: List[str] = []
-    for value in values:
-        value = value.strip()
-        if not value:
-            continue
-        if "," in value:
-            ids.extend(v.strip() for v in value.split(",") if v.strip())
-        else:
-            ids.append(value)
-    return ids
-
-
-def _parse_proxy_args(proxy_args: Iterable[str]) -> Dict[str, str]:
-    """解析命令行传入的代理设置，格式 scheme=url."""
-    proxies: Dict[str, str] = {}
-    for item in proxy_args:
-        raw = item.strip()
-        if not raw:
-            continue
-        if "=" not in raw:
-            logging.warning("代理参数 %s 缺少 '=' 分隔符，将忽略该项", raw)
-            continue
-        key, value = raw.split("=", 1)
-        key = key.strip()
-        value = value.strip()
-        if not key or not value:
-            logging.warning("代理参数 %s 解析失败，将忽略该项", raw)
-            continue
-        proxies[key] = value
-    return proxies
-
-
-def _sanitize_dir_name(name: str) -> str:
-    return name.replace("/", "__")
-
-
-def _ensure_dirs(root_dir: str) -> Dict[str, str]:
-    paths = {
-        "dataset": os.path.join(root_dir, "dataset"),
-        "model": os.path.join(root_dir, "model"),
-    }
-    for path in paths.values():
-        os.makedirs(path, exist_ok=True)
-    return paths
-
-
-def _build_download_config(cache_dir: str, retries: Optional[int], proxies: Dict[str, str]) -> DownloadConfig:
-    config_kwargs = {"cache_dir": cache_dir}
-    if retries is not None:
-        config_kwargs["max_retries"] = retries
-    if proxies:
-        config_kwargs["proxies"] = proxies
-    return DownloadConfig(**config_kwargs)
-
-
-def _apply_timeout(timeout: Optional[float]) -> None:
-    if timeout is None:
-        return
-    str_timeout = str(timeout)
-    os.environ.setdefault("HF_DATASETS_HTTP_TIMEOUT", str_timeout)
-    os.environ.setdefault("HF_HUB_HTTP_TIMEOUT", str_timeout)
-
-
-def _resolve_log_level(level_name: str) -> int:
-    if isinstance(level_name, int):
-        return level_name
-    upper_name = str(level_name).upper()
-    return getattr(logging, upper_name, logging.INFO)
-
-
-def _build_argument_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser(
-        description="批量下载 Hugging Face 数据集和模型并存储到指定目录。"
-    )
-    parser.add_argument(
-        "-d",
-        "--dataset",
-        action="append",
-        default=[],
-        help="要下载的数据集 ID，可重复使用或传入逗号分隔列表。",
-    )
-    parser.add_argument(
-        "-m",
-        "--model",
-        action="append",
-        default=[],
-        help="要下载的模型 ID，可重复使用或传入逗号分隔列表。",
-    )
-    parser.add_argument(
-        "-r",
-        "--root",
-        default="file",
-        help="存储根目录，默认 file。",
-    )
-    parser.add_argument(
-        "--retries",
-        type=int,
-        default=None,
-        help="失败后的重试次数，默认不重试。",
-    )
-    parser.add_argument(
-        "--timeout",
-        type=float,
-        default=None,
-        help="HTTP 超时时间（秒），默认跟随库设置。",
-    )
-    parser.add_argument(
-        "-p",
-        "--proxy",
-        action="append",
-        default=[],
-        help="代理设置，格式 scheme=url，可多次传入，例如 --proxy http=http://127.0.0.1:7890",
-    )
-    parser.add_argument(
-        "--log-level",
-        default="INFO",
-        help="日志级别，默认 INFO。",
-    )
-    return parser
-
-
-def download_datasets(dataset_ids: Iterable[str], root_dir: str, retries: Optional[int], proxies: Dict[str, str]) -> None:
-    if not dataset_ids:
-        return
-    cache_dir = root_dir
-    download_config = _build_download_config(cache_dir, retries, proxies)
-    for dataset_id in dataset_ids:
-        try:
-            logging.info("开始下载数据集 %s", dataset_id)
-            # 使用 load_dataset 触发缓存下载
-            dataset = datasets.load_dataset(
-                dataset_id,
-                cache_dir=cache_dir,
-                download_config=download_config,
-                download_mode="reuse_cache_if_exists",
-            )
-            target_path = os.path.join(root_dir, _sanitize_dir_name(dataset_id))
-            dataset.save_to_disk(target_path)
-            logging.info("数据集 %s 下载完成，存储于 %s", dataset_id, target_path)
-        except Exception as exc:  # pylint: disable=broad-except
-            logging.error("下载数据集 %s 失败: %s", dataset_id, exc)
-
-
-def download_models(
-    model_ids: Iterable[str],
-    target_dir: str,
-    retries: Optional[int],
-    proxies: Dict[str, str],
-    timeout: Optional[float],
-) -> None:
-    if not model_ids:
-        return
-    max_attempts = (retries or 0) + 1
-    hub_kwargs = {
-        "local_dir": target_dir,
-        "local_dir_use_symlinks": False,
-        "max_workers": os.cpu_count() or 4,
-    }
-    if proxies:
-        hub_kwargs["proxies"] = proxies
-    if timeout is not None:
-        hub_kwargs["timeout"] = timeout
-    for model_id in model_ids:
-        attempt = 0
-        while attempt < max_attempts:
-            attempt += 1
-            try:
-                logging.info("开始下载模型 %s (尝试 %s/%s)", model_id, attempt, max_attempts)
-                snapshot_download(
-                    repo_id=model_id,
-                    **hub_kwargs,
-                )
-                logging.info("模型 %s 下载完成，存储于 %s", model_id, target_dir)
-                break
-            except Exception as exc:  # pylint: disable=broad-except
-                logging.error("下载模型 %s 失败: %s", model_id, exc)
-                if attempt >= max_attempts:
-                    logging.error("模型 %s 在重试后仍未成功下载", model_id)
-
-
-def main() -> None:
-    parser = _build_argument_parser()
-    args = parser.parse_args()
-
-    logging.basicConfig(
-        level=_resolve_log_level(args.log_level),
-        format="%(asctime)s - %(levelname)s - %(message)s",
-    )
-
-    dataset_ids = _parse_id_list(args.dataset)
-    model_ids = _parse_id_list(args.model)
-    retries = args.retries
-    timeout = args.timeout
-    proxies = _parse_proxy_args(args.proxy)
-    _apply_timeout(timeout)
-
-    if not dataset_ids and not model_ids:
-        logging.warning(
-            "未配置任何数据集或模型，"
-            "请通过参数 --dataset / --model 指定 Hugging Face ID"
-        )
-        return
-
-    dirs = _ensure_dirs(args.root)
-
-    download_datasets(dataset_ids, dirs["dataset"], retries, proxies)
-    download_models(model_ids, dirs["model"], retries, proxies, timeout)
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/table_snippet_demo.py
+++ b/scripts/table_snippet_demo.py
@ -1,80 +0,0 @@
-from __future__ import annotations
-
-import json
-import os
-import sys
-from datetime import datetime
-from typing import Any, Dict
-
-import requests
-
-
-def build_demo_payload() -> Dict[str, Any]:
-    now = datetime.utcnow()
-    started_at = now.replace(microsecond=0).isoformat() + "Z"
-    finished_at = now.replace(microsecond=0).isoformat() + "Z"
-    return {
-        "table_id": 42,
-        "version_ts": 20251101200000,
-        "action_type": "snippet",
-        "status": "success",
-        "callback_url": "http://localhost:9999/dummy-callback",
-        "table_schema_version_id": 7,
-        "table_schema": {
-            "columns": [
-                {"name": "order_id", "type": "bigint"},
-                {"name": "order_dt", "type": "date"},
-                {"name": "gmv", "type": "decimal(18,2)"},
-            ]
-        },
-        "result_json": [
-            {
-                "id": "snpt_daily_gmv",
-                "title": "按日GMV",
-                "desc": "统计每日GMV总额",
-                "type": "trend",
-                "dialect_sql": {
-                    "mysql": "SELECT order_dt, SUM(gmv) AS total_gmv FROM orders GROUP BY order_dt ORDER BY order_dt"
-                },
-            }
-        ],
-        "result_summary_json": {"total_snippets": 1},
-        "html_report_url": None,
-        "error_code": None,
-        "error_message": None,
-        "started_at": started_at,
-        "finished_at": finished_at,
-        "duration_ms": 1234,
-        "result_checksum": "demo-checksum",
-    }
-
-
-def main() -> int:
-    base_url = os.getenv("TABLE_SNIPPET_DEMO_BASE_URL", "http://localhost:8000")
-    endpoint = f"{base_url.rstrip('/')}/v1/table/snippet"
-    payload = build_demo_payload()
-
-    print(f"POST {endpoint}")
-    print(json.dumps(payload, ensure_ascii=False, indent=2))
-
-    try:
-        response = requests.post(endpoint, json=payload, timeout=30)
-    except requests.RequestException as exc:
-        print(f"Request failed: {exc}", file=sys.stderr)
-        return 1
-
-    print(f"\nStatus: {response.status_code}")
-
-    try:
-        data = response.json()
-        print("Response JSON:")
-        print(json.dumps(data, ensure_ascii=False, indent=2))
-    except ValueError:
-        print("Response Text:")
-        print(response.text)
-
-    return 0 if response.ok else 1
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
--- a/test/test_chat_api_mysql.py
+++ b/test/test_chat_api_mysql.py
@ -1,142 +0,0 @@
-from __future__ import annotations
-
-import os
-import random
-from pathlib import Path
-from typing import Generator, List
-import sys
-
-import pytest
-from fastapi.testclient import TestClient
-from sqlalchemy import text
-from sqlalchemy.exc import SQLAlchemyError
-
-# Ensure the project root is importable when running directly via python.
-ROOT = Path(__file__).resolve().parents[1]
-if str(ROOT) not in sys.path:
-    sys.path.insert(0, str(ROOT))
-
-from app import db
-from app.main import create_app
-
-
-TEST_USER_ID = 872341
-SCHEMA_PATH = Path("file/tableschema/chat.sql")
-DEFAULT_MYSQL_URL = "mysql+pymysql://root:12345678@127.0.0.1:3306/data-ge?charset=utf8mb4"
-
-
-@pytest.fixture(scope="module")
-def client() -> Generator[TestClient, None, None]:
-    mysql_url = os.getenv("TEST_DATABASE_URL", DEFAULT_MYSQL_URL)
-    os.environ["DATABASE_URL"] = mysql_url
-    db.get_engine.cache_clear()
-    engine = db.get_engine()
-    try:
-        # Quick connectivity check
-        with engine.connect() as conn:
-            conn.execute(text("SELECT 1"))
-    except SQLAlchemyError:
-        pytest.skip(f"Cannot connect to MySQL at {mysql_url}")
-
-    #_ensure_chat_schema(engine)
-
-    app = create_app()
-    with TestClient(app) as test_client:
-        yield test_client
-
-    # cleanup test artifacts
-    with engine.begin() as conn:
-        # remove retrievals and turns tied to test sessions
-        conn.execute(
-            text(
-                """
-                DELETE FROM chat_turn_retrieval
-                WHERE turn_id IN (
-                    SELECT id FROM chat_turn WHERE session_id IN (SELECT id FROM chat_session WHERE user_id=:uid)
-                )
-                """
-            ),
-            {"uid": TEST_USER_ID},
-        )
-        conn.execute(
-            text("DELETE FROM chat_turn WHERE session_id IN (SELECT id FROM chat_session WHERE user_id=:uid)"),
-            {"uid": TEST_USER_ID},
-        )
-        conn.execute(text("DELETE FROM chat_session WHERE user_id=:uid"), {"uid": TEST_USER_ID})
-    db.get_engine.cache_clear()
-
-
-def test_session_lifecycle_mysql(client: TestClient) -> None:
-    # Create a session
-    resp = client.post("/api/v1/chat/sessions", json={"user_id": TEST_USER_ID})
-    assert resp.status_code == 200, resp.text
-    session = resp.json()
-    session_id = session["id"]
-    assert session["status"] == "OPEN"
-
-    # Get session
-    assert client.get(f"/api/v1/chat/sessions/{session_id}").status_code == 200
-
-    # List sessions (filter by user)
-    resp = client.get(f"/api/v1/chat/sessions", params={"user_id": TEST_USER_ID})
-    assert resp.status_code == 200
-    assert any(item["id"] == session_id for item in resp.json())
-
-    # Update status
-    resp = client.post(f"/api/v1/chat/sessions/{session_id}/update", json={"status": "PAUSED"})
-    assert resp.status_code == 200
-    assert resp.json()["status"] == "PAUSED"
-
-    # Close session
-    resp = client.post(f"/api/v1/chat/sessions/{session_id}/close")
-    assert resp.status_code == 200
-    assert resp.json()["status"] == "CLOSED"
-
-
-def test_turns_and_retrievals_mysql(client: TestClient) -> None:
-    session_id = client.post("/api/v1/chat/sessions", json={"user_id": TEST_USER_ID}).json()["id"]
-    turn_payload = {
-        "user_id": TEST_USER_ID,
-        "user_query": "展示昨天订单GMV",
-        "intent": "METRIC_QUERY",
-        "ast_json": {"select": ["gmv"], "where": {"dt": "yesterday"}},
-        "main_metric_ids": [random.randint(1000, 9999)],
-        "created_metric_ids": [],
-    }
-    resp = client.post(f"/api/v1/chat/sessions/{session_id}/turns", json=turn_payload)
-    assert resp.status_code == 200, resp.text
-    turn = resp.json()
-    turn_id = turn["id"]
-    assert turn["turn_no"] == 1
-
-    # Fetch turn
-    assert client.get(f"/api/v1/chat/turns/{turn_id}").status_code == 200
-
-    # List turns under session
-    resp = client.get(f"/api/v1/chat/sessions/{session_id}/turns")
-    assert resp.status_code == 200
-    assert any(t["id"] == turn_id for t in resp.json())
-
-    # Insert retrievals
-    retrievals_payload = {
-        "retrievals": [
-            {"item_type": "METRIC", "item_id": "metric_foo", "used_in_sql": True, "rank_no": 1},
-            {"item_type": "SNIPPET", "item_id": "snpt_bar", "similarity_score": 0.77, "rank_no": 2},
-        ]
-    }
-    resp = client.post(f"/api/v1/chat/turns/{turn_id}/retrievals", json=retrievals_payload)
-    assert resp.status_code == 200
-    assert resp.json()["inserted"] == 2
-
-    # List retrievals
-    resp = client.get(f"/api/v1/chat/turns/{turn_id}/retrievals")
-    assert resp.status_code == 200
-    items = resp.json()
-    assert len(items) == 2
-    assert {item["item_type"] for item in items} == {"METRIC", "SNIPPET"}
-
-
-if __name__ == "__main__":
-    import pytest as _pytest
-
-    raise SystemExit(_pytest.main([__file__]))
--- a/test/test_metrics_api_mysql.py
+++ b/test/test_metrics_api_mysql.py
@ -1,207 +0,0 @@
-from __future__ import annotations
-
-import os
-import random
-from datetime import datetime, timedelta
-from pathlib import Path
-from typing import Generator, List
-
-import pytest
-from fastapi.testclient import TestClient
-from sqlalchemy import text
-from sqlalchemy.exc import SQLAlchemyError
-
-# Ensure project root on path for direct execution
-ROOT = Path(__file__).resolve().parents[1]
-import sys
-if str(ROOT) not in sys.path:
-    sys.path.insert(0, str(ROOT))
-
-from app import db
-from app.main import create_app
-
-
-TEST_USER_ID = 98765
-#SCHEMA_PATH = Path("file/tableschema/metrics.sql")
-DEFAULT_MYSQL_URL = "mysql+pymysql://root:12345678@127.0.0.1:3306/data-ge?charset=utf8mb4"
-
-
-# def _run_sql_script(engine, sql_text: str) -> None:
-#     """Execute semicolon-terminated SQL statements sequentially."""
-#     statements: List[str] = []
-#     buffer: List[str] = []
-#     for line in sql_text.splitlines():
-#         stripped = line.strip()
-#         if not stripped or stripped.startswith("--"):
-#             continue
-#         buffer.append(line)
-#         if stripped.endswith(";"):
-#             statements.append("\n".join(buffer).rstrip(";"))
-#             buffer = []
-#     if buffer:
-#         statements.append("\n".join(buffer))
-#     with engine.begin() as conn:
-#         for stmt in statements:
-#             conn.execute(text(stmt))
-
-
-# def _ensure_metric_schema(engine) -> None:
-#     if not SCHEMA_PATH.exists():
-#         pytest.skip("metrics.sql schema file not found.")
-#     raw_sql = SCHEMA_PATH.read_text(encoding="utf-8")
-#     raw_sql = raw_sql.replace("CREATE TABLE metric_def", "CREATE TABLE IF NOT EXISTS metric_def")
-#     raw_sql = raw_sql.replace("CREATE TABLE metric_schedule", "CREATE TABLE IF NOT EXISTS metric_schedule")
-#     raw_sql = raw_sql.replace("CREATE TABLE metric_job_run", "CREATE TABLE IF NOT EXISTS metric_job_run")
-#     raw_sql = raw_sql.replace("CREATE TABLE metric_result", "CREATE TABLE IF NOT EXISTS metric_result")
-#     _run_sql_script(engine, raw_sql)
-
-
-@pytest.fixture(scope="module")
-def client() -> Generator[TestClient, None, None]:
-    mysql_url = os.getenv("TEST_DATABASE_URL", DEFAULT_MYSQL_URL)
-    os.environ["DATABASE_URL"] = mysql_url
-    db.get_engine.cache_clear()
-    engine = db.get_engine()
-    try:
-        with engine.connect() as conn:
-            conn.execute(text("SELECT 1"))
-    except SQLAlchemyError:
-        pytest.skip(f"Cannot connect to MySQL at {mysql_url}")
-
-    #_ensure_metric_schema(engine)
-
-    app = create_app()
-    with TestClient(app) as test_client:
-        yield test_client
-
-    # cleanup test artifacts
-    with engine.begin() as conn:
-        conn.execute(text("DELETE FROM metric_result WHERE metric_id IN (SELECT id FROM metric_def WHERE created_by=:uid)"), {"uid": TEST_USER_ID})
-        conn.execute(text("DELETE FROM metric_job_run WHERE metric_id IN (SELECT id FROM metric_def WHERE created_by=:uid)"), {"uid": TEST_USER_ID})
-        conn.execute(text("DELETE FROM metric_schedule WHERE metric_id IN (SELECT id FROM metric_def WHERE created_by=:uid)"), {"uid": TEST_USER_ID})
-        conn.execute(text("DELETE FROM metric_def WHERE created_by=:uid"), {"uid": TEST_USER_ID})
-    db.get_engine.cache_clear()
-
-
-def test_metric_crud_and_schedule_mysql(client: TestClient) -> None:
-    code = f"metric_{random.randint(1000,9999)}"
-    create_payload = {
-        "metric_code": code,
-        "metric_name": "订单数",
-        "biz_domain": "order",
-        "biz_desc": "订单总数",
-        "base_sql": "select count(*) as order_cnt from orders",
-        "time_grain": "DAY",
-        "dim_binding": ["dt"],
-        "update_strategy": "FULL",
-        "metric_aliases": ["订单量"],
-        "created_by": TEST_USER_ID,
-    }
-    resp = client.post("/api/v1/metrics", json=create_payload)
-    assert resp.status_code == 200, resp.text
-    metric = resp.json()
-    metric_id = metric["id"]
-    assert metric["metric_code"] == code
-
-    # Update metric
-    resp = client.post(f"/api/v1/metrics/{metric_id}", json={"metric_name": "订单数-更新", "is_active": False})
-    assert resp.status_code == 200
-    assert resp.json()["is_active"] is False
-
-    # Get metric
-    resp = client.get(f"/api/v1/metrics/{metric_id}")
-    assert resp.status_code == 200
-    assert resp.json()["metric_name"] == "订单数-更新"
-
-    # Create schedule
-    resp = client.post(
-        "/api/v1/metric-schedules",
-        json={"metric_id": metric_id, "cron_expr": "0 2 * * *", "priority": 5, "enabled": True},
-    )
-    assert resp.status_code == 200, resp.text
-    schedule = resp.json()
-    schedule_id = schedule["id"]
-
-    # Update schedule
-    resp = client.post(f"/api/v1/metric-schedules/{schedule_id}", json={"enabled": False, "retry_times": 1})
-    assert resp.status_code == 200
-    assert resp.json()["enabled"] is False
-
-    # List schedules for metric
-    resp = client.get(f"/api/v1/metrics/{metric_id}/schedules")
-    assert resp.status_code == 200
-    assert any(s["id"] == schedule_id for s in resp.json())
-
-
-def test_metric_runs_and_results_mysql(client: TestClient) -> None:
-    code = f"gmv_{random.randint(1000,9999)}"
-    metric_id = client.post(
-        "/api/v1/metrics",
-        json={
-            "metric_code": code,
-            "metric_name": "GMV",
-            "biz_domain": "order",
-            "base_sql": "select sum(pay_amount) as gmv from orders",
-            "time_grain": "DAY",
-            "dim_binding": ["dt"],
-            "update_strategy": "FULL",
-            "created_by": TEST_USER_ID,
-        },
-    ).json()["id"]
-
-    # Trigger run
-    resp = client.post(
-        "/api/v1/metric-runs/trigger",
-        json={
-            "metric_id": metric_id,
-            "triggered_by": "API",
-            "data_time_from": (datetime.utcnow() - timedelta(days=1)).isoformat(),
-            "data_time_to": datetime.utcnow().isoformat(),
-        },
-    )
-    assert resp.status_code == 200, resp.text
-    run = resp.json()
-    run_id = run["id"]
-    assert run["status"] == "RUNNING"
-
-    # List runs
-    resp = client.get("/api/v1/metric-runs", params={"metric_id": metric_id})
-    assert resp.status_code == 200
-    assert any(r["id"] == run_id for r in resp.json())
-
-    # Get run
-    resp = client.get(f"/api/v1/metric-runs/{run_id}")
-    assert resp.status_code == 200
-
-    # Write results
-    now = datetime.utcnow()
-    resp = client.post(
-        f"/api/v1/metric-results/{metric_id}",
-        json={
-            "metric_id": metric_id,
-            "results": [
-                {"stat_time": (now - timedelta(days=1)).isoformat(), "metric_value": 123.45, "data_version": run_id},
-                {"stat_time": now.isoformat(), "metric_value": 234.56, "data_version": run_id},
-            ],
-        },
-    )
-    assert resp.status_code == 200, resp.text
-    assert resp.json()["inserted"] == 2
-
-    # Query results
-    resp = client.get("/api/v1/metric-results", params={"metric_id": metric_id})
-    assert resp.status_code == 200
-    results = resp.json()
-    assert len(results) >= 2
-
-    # Latest result
-    resp = client.get("/api/v1/metric-results/latest", params={"metric_id": metric_id})
-    assert resp.status_code == 200
-    latest = resp.json()
-    assert float(latest["metric_value"]) in {123.45, 234.56}
-
-
-if __name__ == "__main__":
-    import pytest as _pytest
-
-    raise SystemExit(_pytest.main([__file__]))
--- a/test/test_rag_client.py
+++ b/test/test_rag_client.py
@ -1,91 +0,0 @@
-from __future__ import annotations
-
-import json
-
-import httpx
-import pytest
-
-from app.exceptions import ProviderAPICallError
-from app.schemas.rag import RagDeleteRequest, RagItemPayload, RagRetrieveRequest
-from app.services.rag_client import RagAPIClient
-
-
-@pytest.mark.asyncio
-async def test_add_sends_payload_and_headers() -> None:
-    rag_client = RagAPIClient(base_url="http://rag.test", auth_token="secret-token")
-
-    def handler(request: httpx.Request) -> httpx.Response:
-        assert request.method == "POST"
-        assert str(request.url) == "http://rag.test/rag/add"
-        assert request.headers["Authorization"] == "Bearer secret-token"
-        payload = json.loads(request.content.decode())
-        assert payload == {
-            "id": 1,
-            "workspaceId": 2,
-            "name": "demo",
-            "embeddingData": "vector",
-            "type": "METRIC",
-        }
-        return httpx.Response(200, json={"ok": True, "echo": payload})
-
-    transport = httpx.MockTransport(handler)
-    async with httpx.AsyncClient(transport=transport) as client:
-        result = await rag_client.add(
-            client,
-            RagItemPayload(id=1, workspaceId=2, name="demo", embeddingData="vector", type="METRIC"),
-        )
-    assert result["ok"] is True
-    assert result["echo"]["name"] == "demo"
-
-
-@pytest.mark.asyncio
-async def test_add_batch_serializes_list() -> None:
-    rag_client = RagAPIClient(base_url="http://rag.test", auth_token=None)
-
-    def handler(request: httpx.Request) -> httpx.Response:
-        payload = json.loads(request.content.decode())
-        assert request.url.path == "/rag/addBatch"
-        assert isinstance(payload, list) and len(payload) == 2
-        return httpx.Response(200, json={"received": len(payload)})
-
-    items = [
-        RagItemPayload(id=1, workspaceId=2, name="a", embeddingData="vec-a", type="METRIC"),
-        RagItemPayload(id=2, workspaceId=2, name="b", embeddingData="vec-b", type="METRIC"),
-    ]
-    transport = httpx.MockTransport(handler)
-    async with httpx.AsyncClient(transport=transport) as client:
-        result = await rag_client.add_batch(client, items)
-    assert result == {"received": 2}
-
-
-@pytest.mark.asyncio
-async def test_http_error_raises_provider_error() -> None:
-    rag_client = RagAPIClient(base_url="http://rag.test")
-
-    def handler(request: httpx.Request) -> httpx.Response:
-        return httpx.Response(500, text="boom")
-
-    transport = httpx.MockTransport(handler)
-    async with httpx.AsyncClient(transport=transport) as client:
-        with pytest.raises(ProviderAPICallError) as excinfo:
-            await rag_client.delete(client, RagDeleteRequest(id=1, type="METRIC"))
-
-    err = excinfo.value
-    assert err.status_code == 500
-    assert "boom" in (err.response_text or "")
-
-
-@pytest.mark.asyncio
-async def test_non_json_response_returns_raw_text() -> None:
-    rag_client = RagAPIClient(base_url="http://rag.test")
-
-    def handler(request: httpx.Request) -> httpx.Response:
-        return httpx.Response(200, text="plain-text-body")
-
-    transport = httpx.MockTransport(handler)
-    async with httpx.AsyncClient(transport=transport) as client:
-        result = await rag_client.retrieve(
-            client, RagRetrieveRequest(query="foo", num=1, workspaceId=1, type="METRIC")
-        )
-    assert result == {"raw": "plain-text-body"}
-
--- a/test/test_snippet_rag_ingest.py
+++ b/test/test_snippet_rag_ingest.py
@ -1,157 +0,0 @@
-from __future__ import annotations
-
-import json
-from datetime import datetime
-
-import httpx
-import pytest
-from sqlalchemy import create_engine, text
-
-from app.services.table_snippet import ingest_snippet_rag_from_db
-
-
-def _setup_sqlite_engine():
-    engine = create_engine("sqlite://")
-    with engine.begin() as conn:
-        conn.execute(
-            text(
-                """
-                CREATE TABLE action_results (
-                    id INTEGER PRIMARY KEY AUTOINCREMENT,
-                    table_id INTEGER,
-                    version_ts INTEGER,
-                    action_type TEXT,
-                    status TEXT,
-                    snippet_json TEXT,
-                    snippet_alias_json TEXT,
-                    updated_at TEXT
-                )
-                """
-            )
-        )
-        conn.execute(
-            text(
-                """
-                CREATE TABLE rag_snippet (
-                    rag_item_id INTEGER PRIMARY KEY,
-                    action_result_id INTEGER NOT NULL,
-                    workspace_id INTEGER,
-                    table_id INTEGER,
-                    version_ts INTEGER,
-                    created_at TEXT,
-                    snippet_id TEXT,
-                    rag_text TEXT,
-                    merged_json TEXT,
-                    updated_at TEXT
-                )
-                """
-            )
-        )
-    return engine
-
-
-def _insert_action_row(engine, payload: dict) -> None:
-    with engine.begin() as conn:
-        conn.execute(
-            text(
-                """
-                INSERT INTO action_results (table_id, version_ts, action_type, status, snippet_json, snippet_alias_json, updated_at)
-                VALUES (:table_id, :version_ts, :action_type, :status, :snippet_json, :snippet_alias_json, :updated_at)
-                """
-            ),
-            {
-                "table_id": payload["table_id"],
-                "version_ts": payload["version_ts"],
-                "action_type": payload["action_type"],
-                "status": payload.get("status", "success"),
-                "snippet_json": json.dumps(payload.get("snippet_json"), ensure_ascii=False)
-                if payload.get("snippet_json") is not None
-                else None,
-                "snippet_alias_json": json.dumps(payload.get("snippet_alias_json"), ensure_ascii=False)
-                if payload.get("snippet_alias_json") is not None
-                else None,
-                "updated_at": payload.get("updated_at") or datetime.utcnow().isoformat(),
-            },
-        )
-
-
-class _StubRagClient:
-    def __init__(self) -> None:
-        self.received = None
-
-    async def add_batch(self, _client, items):
-        self.received = items
-        return {"count": len(items)}
-
-
-@pytest.mark.asyncio
-async def test_ingest_snippet_rag_from_db_persists_and_calls_rag_client() -> None:
-    engine = _setup_sqlite_engine()
-    table_id = 321
-    version_ts = 20240102000000
-
-    snippet_payload = [
-        {
-            "id": "snpt_topn",
-            "title": "TopN",
-            "aliases": [{"text": "站点水表排行前N", "tone": "中性"}],
-            "keywords": ["TopN", "站点"],
-        }
-    ]
-    alias_payload = [
-        {
-            "id": "snpt_topn",
-            "aliases": [
-                {"text": "站点水表排行前N", "tone": "中性"},
-                {"text": "按站点水表TopN", "tone": "专业"},
-            ],
-            "keywords": ["TopN", "排行"],
-            "intent_tags": ["topn", "aggregate"],
-        },
-        {
-            "id": "snpt_extra",
-            "aliases": [{"text": "额外别名"}],
-            "keywords": ["extra"],
-        },
-    ]
-
-    _insert_action_row(
-        engine,
-        {
-            "table_id": table_id,
-            "version_ts": version_ts,
-            "action_type": "snippet_alias",
-            "snippet_json": snippet_payload,
-            "snippet_alias_json": alias_payload,
-            "updated_at": "2024-01-02T00:00:00",
-        },
-    )
-
-    rag_stub = _StubRagClient()
-    async with httpx.AsyncClient() as client:
-        rag_ids = await ingest_snippet_rag_from_db(
-            table_id=table_id,
-            version_ts=version_ts,
-            workspace_id=99,
-            rag_item_type="SNIPPET",
-            client=client,
-            engine=engine,
-            rag_client=rag_stub,
-        )
-
-    assert rag_stub.received is not None
-    assert len(rag_stub.received) == 2  # includes alias-only row
-    assert len(rag_ids) == 2
-
-    with engine.connect() as conn:
-        rows = list(
-            conn.execute(
-                text("SELECT snippet_id, action_result_id, rag_text, merged_json FROM rag_snippet ORDER BY snippet_id")
-            )
-        )
-    assert {row[0] for row in rows} == {"snpt_extra", "snpt_topn"}
-    assert all(row[1] is not None for row in rows)
-    topn_row = next(row for row in rows if row[0] == "snpt_topn")
-    assert "TopN" in topn_row[2]
-    assert "按站点水表TopN" in topn_row[2]
-    assert "排行" in topn_row[2]
--- a/test/test_table_profiling_parsing.py
+++ b/test/test_table_profiling_parsing.py
@ -1,74 +0,0 @@
-from __future__ import annotations
-
-from app.services.table_profiling import _parse_completion_payload
-from app.utils.llm_usage import extract_usage
-
-
-def test_parse_completion_payload_handles_array_with_trailing_text() -> None:
-    response_payload = {
-        "choices": [
-            {
-                "message": {
-                    "content": """
-结果如下：
-[
-  {"id": "snpt_a"},
-  {"id": "snpt_b"}
-]
-附加说明：模型可能会输出额外文本。
-""".strip()
-                }
-            }
-        ]
-    }
-
-    parsed = _parse_completion_payload(response_payload)
-
-    assert isinstance(parsed, list)
-    assert [item["id"] for item in parsed] == ["snpt_a", "snpt_b"]
-
-
-def test_extract_usage_info_normalizes_numeric_fields() -> None:
-    response_payload = {
-        "raw": {
-            "usage": {
-                "prompt_tokens": 12.7,
-                "completion_tokens": 3,
-                "total_tokens": 15.7,
-                "prompt_tokens_details": {"cached_tokens": 8.9, "other": None},
-                "non_numeric": "ignored",
-            }
-        }
-    }
-
-    usage = extract_usage(response_payload)
-
-    assert usage == {
-        "prompt_tokens": 12,
-        "completion_tokens": 3,
-        "total_tokens": 15,
-        "prompt_tokens_details": {"cached_tokens": 8},
-    }
-
-
-def test_extract_usage_handles_alias_keys() -> None:
-    response_payload = {
-        "raw": {
-            "usageMetadata": {
-                "input_tokens": 20,
-                "output_tokens": 4,
-            }
-        }
-    }
-
-    usage = extract_usage(response_payload)
-
-    assert usage == {
-        "prompt_tokens": 20,
-        "completion_tokens": 4,
-        "total_tokens": 24,
-    }
-
-
-def test_extract_usage_returns_none_when_missing() -> None:
-    assert extract_usage({"raw": {}}) is None
--- a/test/test_table_snippet_merge.py
+++ b/test/test_table_snippet_merge.py
@ -1,213 +0,0 @@
-from __future__ import annotations
-
-import json
-import os
-import random
-from datetime import datetime, timedelta
-from typing import List
-from pathlib import Path
-
-import sys
-import pytest
-from sqlalchemy import text
-from sqlalchemy.engine import Engine
-from sqlalchemy.exc import SQLAlchemyError
-
-# Ensure the project root is importable when running directly via python.
-ROOT = Path(__file__).resolve().parents[1]
-if str(ROOT) not in sys.path:
-    sys.path.insert(0, str(ROOT))
-
-from app import db
-from app.main import create_app
-
-
-from app.services.table_snippet import merge_snippet_records_from_db
-
-
-DEFAULT_MYSQL_URL = "mysql+pymysql://root:12345678@127.0.0.1:3306/data-ge?charset=utf8mb4"
-
-
-@pytest.fixture()
-def mysql_engine() -> Engine:
-    mysql_url = os.getenv("TEST_DATABASE_URL", DEFAULT_MYSQL_URL)
-    os.environ["DATABASE_URL"] = mysql_url
-    db.get_engine.cache_clear()
-    engine = db.get_engine()
-    try:
-        with engine.connect() as conn:
-            conn.execute(text("SELECT 1"))
-            exists = conn.execute(text("SHOW TABLES LIKE 'action_results'")).scalar()
-            if not exists:
-                pytest.skip("action_results table not found in test database.")
-    except SQLAlchemyError:
-        pytest.skip(f"Cannot connect to MySQL at {mysql_url}")
-    return engine
-
-
-def _insert_action_row(
-    engine: Engine,
-    *,
-    table_id: int,
-    version_ts: int,
-    action_type: str,
-    status: str = "success",
-    snippet_json: List[dict] | None = None,
-    snippet_alias_json: List[dict] | None = None,
-    updated_at: datetime | None = None,
-) -> None:
-    snippet_json_str = json.dumps(snippet_json, ensure_ascii=False) if snippet_json is not None else None
-    snippet_alias_json_str = (
-        json.dumps(snippet_alias_json, ensure_ascii=False) if snippet_alias_json is not None else None
-    )
-    with engine.begin() as conn:
-        conn.execute(
-            text(
-                """
-                INSERT INTO action_results (
-                    table_id, version_ts, action_type, status,
-                    callback_url, table_schema_version_id, table_schema,
-                    snippet_json, snippet_alias_json, updated_at
-                ) VALUES (
-                    :table_id, :version_ts, :action_type, :status,
-                    :callback_url, :table_schema_version_id, :table_schema,
-                    :snippet_json, :snippet_alias_json, :updated_at
-                )
-                ON DUPLICATE KEY UPDATE
-                    status=VALUES(status),
-                    snippet_json=VALUES(snippet_json),
-                    snippet_alias_json=VALUES(snippet_alias_json),
-                    updated_at=VALUES(updated_at)
-                """
-            ),
-            {
-                "table_id": table_id,
-                "version_ts": version_ts,
-                "action_type": action_type,
-                "status": status,
-                "callback_url": "http://localhost/test-callback",
-                "table_schema_version_id": "1",
-                "table_schema": json.dumps({}, ensure_ascii=False),
-                "snippet_json": snippet_json_str,
-                "snippet_alias_json": snippet_alias_json_str,
-                "updated_at": updated_at or datetime.utcnow(),
-            },
-        )
-
-
-def _cleanup(engine: Engine, table_id: int, version_ts: int) -> None:
-    with engine.begin() as conn:
-        conn.execute(
-            text("DELETE FROM action_results WHERE table_id=:table_id AND version_ts=:version_ts"),
-            {"table_id": table_id, "version_ts": version_ts},
-        )
-
-
-def test_merge_prefers_alias_row_and_appends_alias_only_entries(mysql_engine: Engine) -> None:
-    table_id = 990000000 + random.randint(1, 9999)
-    version_ts = int(datetime.utcnow().strftime("%Y%m%d%H%M%S"))
-    alias_updated = datetime(2024, 1, 2, 0, 0, 0)
-
-    snippet_payload = [
-        {
-            "id": "snpt_topn",
-            "aliases": [{"text": "站点水表排行前N", "tone": "中性"}],
-            "keywords": ["TopN", "站点"],
-        }
-    ]
-    alias_payload = [
-        {
-            "id": "snpt_topn",
-            "aliases": [
-                {"text": "站点水表排行前N", "tone": "中性"},
-                {"text": "按站点水表TopN", "tone": "专业"},
-            ],
-            "keywords": ["TopN", "排行"],
-            "intent_tags": ["topn", "aggregate"],
-        },
-        {
-            "id": "snpt_extra",
-            "aliases": [{"text": "额外别名"}],
-            "keywords": ["extra"],
-        },
-    ]
-
-    _insert_action_row(
-        mysql_engine,
-        table_id=table_id,
-        version_ts=version_ts,
-        action_type="snippet_alias",
-        snippet_json=snippet_payload,
-        snippet_alias_json=alias_payload,
-        updated_at=alias_updated,
-    )
-
-    try:
-        merged = merge_snippet_records_from_db(table_id, version_ts, engine=mysql_engine)
-        assert len(merged) == 2
-        topn = next(item for item in merged if item["id"] == "snpt_topn")
-        assert topn["source"] == "snippet"
-        assert topn["updated_at_from_action"] == alias_updated
-        assert {a["text"] for a in topn["aliases"]} == {"站点水表排行前N", "按站点水表TopN"}
-        assert set(topn["keywords"]) == {"TopN", "站点", "排行"}
-        assert set(topn["intent_tags"]) == {"topn", "aggregate"}
-
-        alias_only = next(item for item in merged if item["source"] == "alias_only")
-        assert alias_only["id"] == "snpt_extra"
-        assert alias_only["aliases"][0]["text"] == "额外别名"
-    finally:
-        _cleanup(mysql_engine, table_id, version_ts)
-
-
-def test_merge_falls_back_to_snippet_row_when_alias_row_missing_snippet_json(mysql_engine: Engine) -> None:
-    table_id = 991000000 + random.randint(1, 9999)
-    version_ts = int((datetime.utcnow() + timedelta(seconds=1)).strftime("%Y%m%d%H%M%S"))
-
-    alias_updated = datetime(2024, 1, 3, 0, 0, 0)
-    alias_payload = [
-        {
-            "id": "snpt_quality",
-            "aliases": [{"text": "质量检查"}],
-            "keywords": ["quality"],
-        }
-    ]
-    snippet_payload = [
-        {
-            "id": "snpt_quality",
-            "title": "质量检查",
-            "keywords": ["data-quality"],
-            "aliases": [{"text": "质量检查"}],
-        }
-    ]
-
-    _insert_action_row(
-        mysql_engine,
-        table_id=table_id,
-        version_ts=version_ts,
-        action_type="snippet_alias",
-        snippet_json=None,
-        snippet_alias_json=alias_payload,
-        updated_at=alias_updated,
-    )
-    _insert_action_row(
-        mysql_engine,
-        table_id=table_id,
-        version_ts=version_ts,
-        action_type="snippet",
-        snippet_json=snippet_payload,
-        snippet_alias_json=None,
-        updated_at=datetime(2024, 1, 2, 0, 0, 0),
-    )
-
-    try:
-        merged = merge_snippet_records_from_db(table_id, version_ts, engine=mysql_engine)
-
-        assert len(merged) == 1
-        record = merged[0]
-        assert record["id"] == "snpt_quality"
-        assert record["source"] == "snippet"
-        assert record["updated_at_from_action"] == alias_updated
-        assert set(record["keywords"]) == {"data-quality", "quality"}
-        assert {a["text"] for a in record["aliases"]} == {"质量检查"}
-    finally:
-        _cleanup(mysql_engine, table_id, version_ts)
--- a/doc/todo.md
+++ b/doc/todo.md
--- a/uv.lock
+++ b/uv.lock
@ -1,13 +0,0 @@
-version = 1
-revision = 1
-requires-python = ">=3.11"
-resolution-markers = [
-    "python_full_version >= '3.14'",
-    "python_full_version >= '3.12' and python_full_version < '3.14'",
-    "python_full_version < '3.12'",
-]
-
-[[package]]
-name = "data-ge-new"
-version = "0.1.0"
-source = { virtual = "." }
				`@ -1 +0,0 @@`
				{"role": "dimension", "time": {"range": null, "column": null, "has_gaps": null, "granularity": "unknown"}, "grain": ["service_point_id"], "table": "data-ge.water_meter_info", "columns": [{"name": "meter_subtype", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 9, "distinct_ratio": 0.03, "pk_candidate_score": 0.03, "metric_candidate_score": 0.0}, {"name": "installation_position", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 4, "distinct_ratio": 0.013333333333333334, "pk_candidate_score": 0.013333333333333334, "metric_candidate_score": 0.0}, {"name": "supply_office", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 11, "distinct_ratio": 0.03666666666666667, "pk_candidate_score": 0.03666666666666667, "metric_candidate_score": 0.0}, {"name": "meter_diameter", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 8, "distinct_ratio": 0.02666666666666667, "pk_candidate_score": 0.02666666666666667, "metric_candidate_score": 0.0}, {"name": "account_id", "dtype": "unknown", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列的统计指标（如空值率、唯一性）缺失，但根据命名规则推断为ID。", "enumish": null, "null_rate": null, "top_values": [], "semantic_type": "id", "distinct_count": null, "distinct_ratio": null, "pk_candidate_score": 0.9, "metric_candidate_score": 0.0}, {"name": "service_point_id", "dtype": "unknown", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列的统计指标（如空值率、唯一性）缺失，但根据命名规则推断为ID。", "enumish": null, "null_rate": null, "top_values": [], "semantic_type": "id", "distinct_count": null, "distinct_ratio": null, "pk_candidate_score": 0.95, "metric_candidate_score": 0.0}, {"name": "station", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 36, "distinct_ratio": 0.12, "pk_candidate_score": 0.12, "metric_candidate_score": 0.0}, {"name": "meter_type", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 5, "distinct_ratio": 0.016666666666666666, "pk_candidate_score": 0.016666666666666666, "metric_candidate_score": 0.0}, {"name": "district", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 13, "distinct_ratio": 0.043333333333333335, "pk_candidate_score": 0.043333333333333335, "metric_candidate_score": 0.0}, {"name": "meter_status", "dtype": "string", "stats": {"max": null, "min": null, "std": null, "mean": null, "skewness": null}, "comment": "该列只有一个唯一值 '有效'。", "enumish": true, "null_rate": 0.0, "top_values": [], "semantic_type": "dimension", "distinct_count": 1, "distinct_ratio": 0.0033333333333333335, "pk_candidate_score": 0.0033333333333333335, "metric_candidate_score": 0.0}], "quality": {"warning_hints": ["列 'meter_status' 只有一个唯一值 '有效'，可能为常量列。"], "failed_expectations": []}, "row_count": 300, "fk_candidates": [], "confidence_notes": ["表角色(role)被推断为 'dimension'，因为其列几乎完全由ID和类别属性构成，且缺少数值指标或时间序列列。", "主键候选(primary_key_candidates) 'service_point_id' 和 'account_id' 是基于命名约定（包含'_id'）推断的。其唯一性和非空性未在GE结果中直接度量，因此这是一个高置信度的猜测。", "表粒度(grain)可能为 'service_point'，与推断的主键 'service_point_id' 相对应。", "未根据列名或数据格式识别出时间列。"], "primary_key_candidates": [["service_point_id"], ["account_id"]]}