切换成new-api方式进行llm调用
This commit is contained in:
4
.env
4
.env
@ -17,7 +17,7 @@ DEFAULT_IMPORT_MODEL=deepseek:deepseek-chat
|
|||||||
IMPORT_GATEWAY_BASE_URL=http://localhost:8000
|
IMPORT_GATEWAY_BASE_URL=http://localhost:8000
|
||||||
|
|
||||||
# HTTP client configuration
|
# HTTP client configuration
|
||||||
HTTP_CLIENT_TIMEOUT=60
|
HTTP_CLIENT_TIMEOUT=120
|
||||||
HTTP_CLIENT_TRUST_ENV=false
|
HTTP_CLIENT_TRUST_ENV=false
|
||||||
# HTTP_CLIENT_PROXY=
|
# HTTP_CLIENT_PROXY=
|
||||||
|
|
||||||
@ -27,3 +27,5 @@ IMPORT_CHAT_TIMEOUT_SECONDS=120
|
|||||||
# Logging
|
# Logging
|
||||||
LOG_LEVEL=INFO
|
LOG_LEVEL=INFO
|
||||||
# LOG_FORMAT=%(asctime)s %(levelname)s %(name)s:%(lineno)d %(message)s
|
# LOG_FORMAT=%(asctime)s %(levelname)s %(name)s:%(lineno)d %(message)s
|
||||||
|
NEW_API_BASE_URL=http://localhost:3000
|
||||||
|
NEW_API_AUTH_TOKEN="sk-Q79KGFJRs5Vk9HsfFqoiJk948uLMDhAVe037AeCb31URyWGL"
|
||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
This project exposes a FastAPI-based microservice that provides:
|
This project exposes a FastAPI-based microservice that provides:
|
||||||
|
|
||||||
- A unified chat completions gateway supporting multiple LLM providers (OpenAI, Anthropic, OpenRouter, Gemini, Qwen, DeepSeek, etc.)
|
- A unified chat completions gateway that now forwards requests to the internal `new-api` service (default `http://localhost:3000`) while preserving the same client-facing schema.
|
||||||
- An asynchronous data import analysis pipeline that orchestrates LLM calls to produce structured metadata and processing recommendations
|
- An asynchronous data import analysis pipeline that orchestrates LLM calls to produce structured metadata and processing recommendations
|
||||||
|
|
||||||
The following instructions cover environment setup, dependency installation, and running the backend service.
|
The following instructions cover environment setup, dependency installation, and running the backend service.
|
||||||
@ -56,6 +56,7 @@ Copy `.env.example` to `.env` (if provided) or edit `.env` to supply API keys an
|
|||||||
- `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `OPENROUTER_API_KEY`, etc.
|
- `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `OPENROUTER_API_KEY`, etc.
|
||||||
- `HTTP_CLIENT_TIMEOUT`, `IMPORT_CHAT_TIMEOUT_SECONDS`
|
- `HTTP_CLIENT_TIMEOUT`, `IMPORT_CHAT_TIMEOUT_SECONDS`
|
||||||
- `LOG_LEVEL`, `LOG_FORMAT` for logging
|
- `LOG_LEVEL`, `LOG_FORMAT` for logging
|
||||||
|
- `NEW_API_BASE_URL` (defaults to `http://localhost:3000`) and optional `NEW_API_AUTH_TOKEN` if the new-api component enforces authentication.
|
||||||
|
|
||||||
|
|
||||||
## Run the Backend Service
|
## Run the Backend Service
|
||||||
@ -84,4 +85,4 @@ Or use a process manager such as `pm2`, `supervisor`, or systemd for production
|
|||||||
|
|
||||||
- Run the data import analysis example: `python test/data_import_analysis_example.py`
|
- Run the data import analysis example: `python test/data_import_analysis_example.py`
|
||||||
- Test the OpenRouter demo: `python test/openrouter_chat_example.py`
|
- Test the OpenRouter demo: `python test/openrouter_chat_example.py`
|
||||||
- Send a DeepSeek chat request script: `python scripts/deepseek_request.py`
|
- Send a DeepSeek chat request script: `python scripts/deepseek_request.py`
|
||||||
|
|||||||
@ -1,53 +1,93 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import logging
|
||||||
from typing import Dict, Type
|
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
from pydantic import ValidationError
|
||||||
|
|
||||||
from app.exceptions import ProviderConfigurationError
|
from app.exceptions import ProviderAPICallError
|
||||||
from app.models import LLMProvider, LLMRequest, LLMResponse
|
from app.models import LLMChoice, LLMMessage, LLMRequest, LLMResponse
|
||||||
from app.providers import (
|
from app.settings import NEW_API_AUTH_TOKEN, NEW_API_BASE_URL
|
||||||
AnthropicProvider,
|
|
||||||
DeepSeekProvider,
|
|
||||||
GeminiProvider,
|
logger = logging.getLogger(__name__)
|
||||||
LLMProviderClient,
|
|
||||||
OpenAIProvider,
|
|
||||||
OpenRouterProvider,
|
|
||||||
QwenProvider,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class LLMGateway:
|
class LLMGateway:
|
||||||
"""Simple registry that dispatches chat requests to provider clients."""
|
"""Forward chat requests to the configured new-api component."""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(
|
||||||
self._providers: Dict[LLMProvider, LLMProviderClient] = {}
|
self,
|
||||||
self._factory: Dict[LLMProvider, Type[LLMProviderClient]] = {
|
*,
|
||||||
LLMProvider.OPENAI: OpenAIProvider,
|
base_url: str | None = None,
|
||||||
LLMProvider.ANTHROPIC: AnthropicProvider,
|
auth_token: str | None = None,
|
||||||
LLMProvider.OPENROUTER: OpenRouterProvider,
|
) -> None:
|
||||||
LLMProvider.GEMINI: GeminiProvider,
|
resolved_base = base_url or NEW_API_BASE_URL
|
||||||
LLMProvider.QWEN: QwenProvider,
|
self._base_url = resolved_base.rstrip("/")
|
||||||
LLMProvider.DEEPSEEK: DeepSeekProvider,
|
self._auth_token = auth_token or NEW_API_AUTH_TOKEN
|
||||||
}
|
|
||||||
|
|
||||||
def get_provider(self, provider: LLMProvider) -> LLMProviderClient:
|
|
||||||
if provider not in self._factory:
|
|
||||||
raise ProviderConfigurationError(f"Unsupported provider '{provider.value}'.")
|
|
||||||
|
|
||||||
if provider not in self._providers:
|
|
||||||
self._providers[provider] = self._build_provider(provider)
|
|
||||||
return self._providers[provider]
|
|
||||||
|
|
||||||
def _build_provider(self, provider: LLMProvider) -> LLMProviderClient:
|
|
||||||
provider_cls = self._factory[provider]
|
|
||||||
api_key_env = getattr(provider_cls, "api_key_env", None)
|
|
||||||
api_key = os.getenv(api_key_env) if api_key_env else None
|
|
||||||
return provider_cls(api_key)
|
|
||||||
|
|
||||||
async def chat(
|
async def chat(
|
||||||
self, request: LLMRequest, client: httpx.AsyncClient
|
self, request: LLMRequest, client: httpx.AsyncClient
|
||||||
) -> LLMResponse:
|
) -> LLMResponse:
|
||||||
provider_client = self.get_provider(request.provider)
|
url = f"{self._base_url}/v1/chat/completions"
|
||||||
return await provider_client.chat(request, client)
|
payload = request.model_dump(mode="json", exclude_none=True)
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if self._auth_token:
|
||||||
|
headers["Authorization"] = f"Bearer {self._auth_token}"
|
||||||
|
logger.info("Forwarding chat request to new-api at %s", url)
|
||||||
|
try:
|
||||||
|
response = await client.post(url, json=payload, headers=headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as exc:
|
||||||
|
status_code = exc.response.status_code if exc.response else None
|
||||||
|
response_text = exc.response.text if exc.response else ""
|
||||||
|
logger.error(
|
||||||
|
"new-api upstream returned %s: %s",
|
||||||
|
status_code,
|
||||||
|
response_text,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
raise ProviderAPICallError(
|
||||||
|
"Chat completion request failed.",
|
||||||
|
status_code=status_code,
|
||||||
|
response_text=response_text,
|
||||||
|
) from exc
|
||||||
|
except httpx.HTTPError as exc:
|
||||||
|
logger.error("new-api transport error: %s", exc, exc_info=True)
|
||||||
|
raise ProviderAPICallError(f"Chat completion request failed: {exc}") from exc
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
except ValueError as exc:
|
||||||
|
logger.error("new-api responded with invalid JSON.", exc_info=True)
|
||||||
|
raise ProviderAPICallError(
|
||||||
|
"Chat completion response was not valid JSON."
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
logger.info("new-api payload: %s", data)
|
||||||
|
normalized_choices: list[LLMChoice] = []
|
||||||
|
for idx, choice in enumerate(data.get("choices", []) or []):
|
||||||
|
message_payload = choice.get("message") or {}
|
||||||
|
message = LLMMessage(
|
||||||
|
role=message_payload.get("role", "assistant"),
|
||||||
|
content=message_payload.get("content", ""),
|
||||||
|
)
|
||||||
|
normalized_choices.append(
|
||||||
|
LLMChoice(index=choice.get("index", idx), message=message)
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
normalized_response = LLMResponse(
|
||||||
|
provider=request.provider,
|
||||||
|
model=data.get("model", request.model),
|
||||||
|
choices=normalized_choices,
|
||||||
|
raw=data,
|
||||||
|
)
|
||||||
|
return normalized_response
|
||||||
|
except ValidationError as exc:
|
||||||
|
logger.error(
|
||||||
|
"new-api response did not match expected schema: %s", data, exc_info=True
|
||||||
|
)
|
||||||
|
raise ProviderAPICallError(
|
||||||
|
"Chat completion response was not in the expected format."
|
||||||
|
) from exc
|
||||||
|
|||||||
@ -22,14 +22,24 @@ from app.models import (
|
|||||||
LLMResponse,
|
LLMResponse,
|
||||||
LLMRole,
|
LLMRole,
|
||||||
)
|
)
|
||||||
from app.settings import DEFAULT_IMPORT_MODEL, get_supported_import_models
|
from app.settings import (
|
||||||
|
DEFAULT_IMPORT_MODEL,
|
||||||
|
NEW_API_AUTH_TOKEN,
|
||||||
|
NEW_API_BASE_URL,
|
||||||
|
get_supported_import_models,
|
||||||
|
)
|
||||||
from app.utils.llm_usage import extract_usage
|
from app.utils.llm_usage import extract_usage
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
IMPORT_GATEWAY_BASE_URL = os.getenv(
|
IMPORT_GATEWAY_BASE_URL = os.getenv("IMPORT_GATEWAY_BASE_URL", NEW_API_BASE_URL)
|
||||||
"IMPORT_GATEWAY_BASE_URL", "http://localhost:8000"
|
|
||||||
)
|
|
||||||
|
def build_import_gateway_headers() -> dict[str, str]:
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if NEW_API_AUTH_TOKEN:
|
||||||
|
headers["Authorization"] = f"Bearer {NEW_API_AUTH_TOKEN}"
|
||||||
|
return headers
|
||||||
|
|
||||||
|
|
||||||
def _env_float(name: str, default: float) -> float:
|
def _env_float(name: str, default: float) -> float:
|
||||||
@ -314,16 +324,18 @@ async def dispatch_import_analysis_job(
|
|||||||
url = f"{IMPORT_GATEWAY_BASE_URL.rstrip('/')}/v1/chat/completions"
|
url = f"{IMPORT_GATEWAY_BASE_URL.rstrip('/')}/v1/chat/completions"
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Dispatching import %s to %s: %s",
|
"Dispatching import %s to %s using provider=%s model=%s",
|
||||||
request.import_record_id,
|
request.import_record_id,
|
||||||
url,
|
url,
|
||||||
json.dumps(payload, ensure_ascii=False),
|
payload.get("provider"),
|
||||||
|
payload.get("model"),
|
||||||
)
|
)
|
||||||
|
|
||||||
timeout = httpx.Timeout(IMPORT_CHAT_TIMEOUT_SECONDS)
|
timeout = httpx.Timeout(IMPORT_CHAT_TIMEOUT_SECONDS)
|
||||||
|
headers = build_import_gateway_headers()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await client.post(url, json=payload, timeout=timeout)
|
response = await client.post(url, json=payload, timeout=timeout, headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except httpx.HTTPStatusError as exc:
|
except httpx.HTTPStatusError as exc:
|
||||||
body_preview = ""
|
body_preview = ""
|
||||||
@ -348,9 +360,10 @@ async def dispatch_import_analysis_job(
|
|||||||
response.status_code,
|
response.status_code,
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
"LLM response for %s: %s",
|
"LLM response received for %s (status %s, choices=%s)",
|
||||||
request.import_record_id,
|
request.import_record_id,
|
||||||
json.dumps(response_data, ensure_ascii=False),
|
response.status_code,
|
||||||
|
len(response_data.get("choices") or []),
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -404,6 +417,7 @@ async def process_import_analysis_job(
|
|||||||
request: DataImportAnalysisJobRequest,
|
request: DataImportAnalysisJobRequest,
|
||||||
client: httpx.AsyncClient,
|
client: httpx.AsyncClient,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
# Run the import analysis and ensure the callback fires regardless of success/failure.
|
||||||
try:
|
try:
|
||||||
payload = await dispatch_import_analysis_job(request, client)
|
payload = await dispatch_import_analysis_job(request, client)
|
||||||
except ProviderAPICallError as exc:
|
except ProviderAPICallError as exc:
|
||||||
|
|||||||
@ -24,6 +24,7 @@ from app.services import LLMGateway
|
|||||||
from app.settings import DEFAULT_IMPORT_MODEL
|
from app.settings import DEFAULT_IMPORT_MODEL
|
||||||
from app.services.import_analysis import (
|
from app.services.import_analysis import (
|
||||||
IMPORT_GATEWAY_BASE_URL,
|
IMPORT_GATEWAY_BASE_URL,
|
||||||
|
build_import_gateway_headers,
|
||||||
resolve_provider_from_model,
|
resolve_provider_from_model,
|
||||||
)
|
)
|
||||||
from app.utils.llm_usage import extract_usage as extract_llm_usage
|
from app.utils.llm_usage import extract_usage as extract_llm_usage
|
||||||
@ -532,6 +533,7 @@ async def _call_chat_completions(
|
|||||||
temperature: float = 0.2,
|
temperature: float = 0.2,
|
||||||
timeout_seconds: Optional[float] = None,
|
timeout_seconds: Optional[float] = None,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
|
# Normalize model spec to provider+model and issue the unified chat call.
|
||||||
provider, model_name = resolve_provider_from_model(model_spec)
|
provider, model_name = resolve_provider_from_model(model_spec)
|
||||||
payload = {
|
payload = {
|
||||||
"provider": provider.value,
|
"provider": provider.value,
|
||||||
@ -545,16 +547,17 @@ async def _call_chat_completions(
|
|||||||
payload_size_bytes = len(json.dumps(payload, ensure_ascii=False).encode("utf-8"))
|
payload_size_bytes = len(json.dumps(payload, ensure_ascii=False).encode("utf-8"))
|
||||||
|
|
||||||
url = f"{IMPORT_GATEWAY_BASE_URL.rstrip('/')}/v1/chat/completions"
|
url = f"{IMPORT_GATEWAY_BASE_URL.rstrip('/')}/v1/chat/completions"
|
||||||
|
headers = build_import_gateway_headers()
|
||||||
try:
|
try:
|
||||||
# log the request whole info
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Calling chat completions API %s with model %s and size %s and payload %s",
|
"Calling chat completions API %s with model=%s payload_size=%sB",
|
||||||
url,
|
url,
|
||||||
model_name,
|
model_name,
|
||||||
payload_size_bytes,
|
payload_size_bytes,
|
||||||
payload,
|
|
||||||
)
|
)
|
||||||
response = await client.post(url, json=payload, timeout=timeout_seconds)
|
response = await client.post(
|
||||||
|
url, json=payload, timeout=timeout_seconds, headers=headers
|
||||||
|
)
|
||||||
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except httpx.HTTPError as exc:
|
except httpx.HTTPError as exc:
|
||||||
@ -703,6 +706,7 @@ async def _run_action_with_callback(
|
|||||||
input_payload: Any = None,
|
input_payload: Any = None,
|
||||||
model_spec: Optional[str] = None,
|
model_spec: Optional[str] = None,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
|
# Execute a pipeline action and always emit a callback capturing success/failure.
|
||||||
if input_payload is not None:
|
if input_payload is not None:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Pipeline action %s input: %s",
|
"Pipeline action %s input: %s",
|
||||||
|
|||||||
@ -20,7 +20,11 @@ PROVIDER_KEY_ENV_MAP: Dict[str, str] = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_IMPORT_MODEL = os.getenv("DEFAULT_IMPORT_MODEL", "openai:gpt-4.1-mini")
|
DEFAULT_IMPORT_MODEL = os.getenv("DEFAULT_IMPORT_MODEL", "deepseek:deepseek-chat")
|
||||||
|
NEW_API_BASE_URL = os.getenv("NEW_API_BASE_URL")
|
||||||
|
NEW_API_AUTH_TOKEN = os.getenv("NEW_API_AUTH_TOKEN")
|
||||||
|
RAG_API_BASE_URL = os.getenv("RAG_API_BASE_URL", "http://127.0.0.1:8000")
|
||||||
|
RAG_API_AUTH_TOKEN = os.getenv("RAG_API_AUTH_TOKEN")
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=1)
|
@lru_cache(maxsize=1)
|
||||||
|
|||||||
23
project.md
Normal file
23
project.md
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
项目结构与逻辑
|
||||||
|
|
||||||
|
app/main.py:创建 FastAPI 应用与生命周期,初始化共享 httpx.AsyncClient 和 LLMGateway,统一异常处理后暴露四个接口:聊天代理、导入分析、表画像流水线、表片段入库。
|
||||||
|
app/models.py:定义所有请求/响应模型与枚举(LLM 请求、导入分析作业、表画像作业、片段入库等),并给出字段校验与默认值。
|
||||||
|
app/services:核心业务逻辑
|
||||||
|
gateway.py 将 /v1/chat/completions 请求转发到 NEW_API_BASE_URL(带可选 Bearer Token),并归一化返回。
|
||||||
|
import_analysis.py 组装导入提示词(prompt/data_import_analysis.md)、解析/截断样本、调用统一聊天接口、抽取 JSON 结果与 token 用量,最后回调业务方。
|
||||||
|
table_profiling.py 串行执行 4 步流水线:Great Expectations profiling → LLM 结果描述(prompt/ge_result_desc_prompt.md)→ 片段生成(prompt/snippet_generator.md)→ 片段别名(prompt/snippet_alias_generator.md),每步都回调状态与结果。
|
||||||
|
table_snippet.py 将各步骤结果 upsert 到数据库表,自动序列化 JSON/大小信息并构造 INSERT ... ON DUPLICATE KEY UPDATE。
|
||||||
|
app/providers/*:各云厂商直连客户端(OpenAI/Anthropic/OpenRouter/Gemini/Qwen/DeepSeek),实现统一 chat 接口;当前主流程通过 new-api 转发,但保留直连能力。
|
||||||
|
prompt/ 存放提示词模板;scripts/ 与 test/ 目录提供接口调用示例和回归样本;table_snippet.sql 给出 action_results 表结构(用于片段与 profiling 结果持久化)。
|
||||||
|
功能/需求说明
|
||||||
|
|
||||||
|
LLM 网关:POST /v1/chat/completions 接收 LLMRequest(provider+model+messages 等),将 payload 透传到 NEW_API_BASE_URL/v1/chat/completions,带可选 NEW_API_AUTH_TOKEN 认证;异常时返回 4xx/5xx 并记录原始响应。
|
||||||
|
导入分析(异步):POST /v1/import/analyze 接收导入样本(rows/headers/raw_csv/table_schema)、目标模型 llm_model(默认 DEFAULT_IMPORT_MODEL,可被 IMPORT_SUPPORTED_MODELS 白名单限制)、温度与回调地址。服务将样本转 CSV、附加 schema,拼接系统+用户消息后调用统一聊天接口,解析首个 choice 中的 JSON 作为分析结果,连同 LLM usage 一并以回调形式返回;失败时回调 status=failed 与错误信息。
|
||||||
|
表画像流水线(异步):POST /v1/table/profiling 接收表标识、版本号、回调地址及 GE/LLM 配置(datasource/batch_request、连接串模板、LLM 模型与超时)。流水线按顺序执行:
|
||||||
|
Great Expectations profiling(可指定 profiler 类型、datasource、runtime SQL 查询/表),生成完整与摘要 JSON 及 Data Docs 路径;
|
||||||
|
调用聊天接口生成 GE 结果描述 JSON;
|
||||||
|
基于描述生成 SQL 片段数组;
|
||||||
|
生成片段别名/关键词。
|
||||||
|
每步成功/失败都会回调,payload 包含 action_type、结果 JSON、模型、llm_usage、报错信息等。
|
||||||
|
片段结果入库:POST /v1/table/snippet 接收 TableSnippetUpsertRequest(表/版本、action 类型、状态、schema、模型信息、各阶段 JSON 及大小、错误码、时间戳等),组装到 action_results 表进行 UPSERT,返回是否更新已有记录。
|
||||||
|
配置与运行要求:核心环境变量在 app/settings.py(API Keys、DEFAULT_IMPORT_MODEL、IMPORT_GATEWAY_BASE_URL/NEW_API_BASE_URL、模型白名单、数据库 URL 等);日志使用 logging.yaml 自动创建 logs/;HTTP 客户端超时/代理可通过 HTTP_CLIENT_TIMEOUT、HTTP_CLIENT_TRUST_ENV、HTTP_CLIENT_PROXY 控制。 调试可用 uvicorn app.main:app --reload,Docker 由 Dockerfile/docker-compose.yml 提供。
|
||||||
Reference in New Issue
Block a user