恢复GE版本为0.18,生成SNIPPET后自动生成rag_text流程
This commit is contained in:
37
app/main.py
37
app/main.py
@ -24,6 +24,8 @@ from app.models import (
|
|||||||
LLMResponse,
|
LLMResponse,
|
||||||
TableProfilingJobAck,
|
TableProfilingJobAck,
|
||||||
TableProfilingJobRequest,
|
TableProfilingJobRequest,
|
||||||
|
TableSnippetRagIngestRequest,
|
||||||
|
TableSnippetRagIngestResponse,
|
||||||
TableSnippetUpsertRequest,
|
TableSnippetUpsertRequest,
|
||||||
TableSnippetUpsertResponse,
|
TableSnippetUpsertResponse,
|
||||||
)
|
)
|
||||||
@ -252,6 +254,7 @@ def create_app() -> FastAPI:
|
|||||||
)
|
)
|
||||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||||
else:
|
else:
|
||||||
|
# After snippet_alias is stored, automatically trigger RAG ingest when configured.
|
||||||
if (
|
if (
|
||||||
payload.action_type == ActionType.SNIPPET_ALIAS
|
payload.action_type == ActionType.SNIPPET_ALIAS
|
||||||
and payload.status == ActionStatus.SUCCESS
|
and payload.status == ActionStatus.SUCCESS
|
||||||
@ -267,14 +270,46 @@ def create_app() -> FastAPI:
|
|||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception(
|
logger.exception(
|
||||||
"Failed to ingest snippet RAG artifacts",
|
"Failed to ingest snippet RAG artifacts after snippet_alias upsert",
|
||||||
extra={
|
extra={
|
||||||
"table_id": payload.table_id,
|
"table_id": payload.table_id,
|
||||||
"version_ts": payload.version_ts,
|
"version_ts": payload.version_ts,
|
||||||
|
"workspace_id": payload.rag_workspace_id,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@application.post(
|
||||||
|
"/v1/table/snippet/rag_ingest",
|
||||||
|
response_model=TableSnippetRagIngestResponse,
|
||||||
|
summary="Merge snippet+alias results from action_results and ingest into RAG.",
|
||||||
|
)
|
||||||
|
async def ingest_snippet_rag(
|
||||||
|
payload: TableSnippetRagIngestRequest,
|
||||||
|
client: httpx.AsyncClient = Depends(get_http_client),
|
||||||
|
) -> TableSnippetRagIngestResponse:
|
||||||
|
try:
|
||||||
|
rag_item_ids = await ingest_snippet_rag_from_db(
|
||||||
|
table_id=payload.table_id,
|
||||||
|
version_ts=payload.version_ts,
|
||||||
|
workspace_id=payload.workspace_id,
|
||||||
|
rag_item_type=payload.rag_item_type or "SNIPPET",
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception(
|
||||||
|
"Failed to ingest snippet RAG artifacts",
|
||||||
|
extra={
|
||||||
|
"table_id": payload.table_id,
|
||||||
|
"version_ts": payload.version_ts,
|
||||||
|
"workspace_id": payload.workspace_id,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||||
|
|
||||||
|
return TableSnippetRagIngestResponse(rag_item_ids=rag_item_ids)
|
||||||
|
|
||||||
@application.post("/__mock__/import-callback")
|
@application.post("/__mock__/import-callback")
|
||||||
async def mock_import_callback(payload: dict[str, Any]) -> dict[str, str]:
|
async def mock_import_callback(payload: dict[str, Any]) -> dict[str, str]:
|
||||||
logger.info("Received import analysis callback: %s", payload)
|
logger.info("Received import analysis callback: %s", payload)
|
||||||
|
|||||||
@ -232,6 +232,15 @@ class TableProfilingJobRequest(BaseModel):
|
|||||||
None,
|
None,
|
||||||
description="Miscellaneous execution flags applied across pipeline steps.",
|
description="Miscellaneous execution flags applied across pipeline steps.",
|
||||||
)
|
)
|
||||||
|
workspace_id: Optional[int] = Field(
|
||||||
|
None,
|
||||||
|
ge=0,
|
||||||
|
description="Optional workspace identifier forwarded to snippet_alias callback for RAG ingestion.",
|
||||||
|
)
|
||||||
|
rag_item_type: Optional[str] = Field(
|
||||||
|
"SNIPPET",
|
||||||
|
description="Optional RAG item type forwarded to snippet_alias callback.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TableProfilingJobAck(BaseModel):
|
class TableProfilingJobAck(BaseModel):
|
||||||
@ -247,7 +256,7 @@ class TableSnippetUpsertRequest(BaseModel):
|
|||||||
ge=0,
|
ge=0,
|
||||||
description="Version timestamp aligned with the pipeline (yyyyMMddHHmmss as integer).",
|
description="Version timestamp aligned with the pipeline (yyyyMMddHHmmss as integer).",
|
||||||
)
|
)
|
||||||
rag_workspace_id: Optional[int] = Field(
|
workspace_id: Optional[int] = Field(
|
||||||
None,
|
None,
|
||||||
ge=0,
|
ge=0,
|
||||||
description="Optional workspace identifier for RAG ingestion; when provided and action_type=snippet_alias "
|
description="Optional workspace identifier for RAG ingestion; when provided and action_type=snippet_alias "
|
||||||
@ -329,6 +338,24 @@ class TableSnippetUpsertRequest(BaseModel):
|
|||||||
ge=0,
|
ge=0,
|
||||||
description="Optional execution duration in milliseconds.",
|
description="Optional execution duration in milliseconds.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TableSnippetRagIngestRequest(BaseModel):
|
||||||
|
table_id: int = Field(..., ge=1, description="Unique identifier for the table.")
|
||||||
|
version_ts: int = Field(
|
||||||
|
...,
|
||||||
|
ge=0,
|
||||||
|
description="Version timestamp aligned with the pipeline (yyyyMMddHHmmss as integer).",
|
||||||
|
)
|
||||||
|
workspace_id: int = Field(..., ge=0, description="Workspace id used when pushing snippets to RAG.")
|
||||||
|
rag_item_type: Optional[str] = Field(
|
||||||
|
"SNIPPET",
|
||||||
|
description="Optional RAG item type used when pushing snippets to RAG. Defaults to 'SNIPPET'.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TableSnippetRagIngestResponse(BaseModel):
|
||||||
|
rag_item_ids: List[int] = Field(..., description="List of ingested rag_item_ids.")
|
||||||
result_checksum: Optional[str] = Field(
|
result_checksum: Optional[str] = Field(
|
||||||
None,
|
None,
|
||||||
description="Optional checksum for the result payload (e.g., MD5).",
|
description="Optional checksum for the result payload (e.g., MD5).",
|
||||||
|
|||||||
@ -24,7 +24,6 @@ from app.services import LLMGateway
|
|||||||
from app.settings import DEFAULT_IMPORT_MODEL
|
from app.settings import DEFAULT_IMPORT_MODEL
|
||||||
from app.services.import_analysis import (
|
from app.services.import_analysis import (
|
||||||
IMPORT_GATEWAY_BASE_URL,
|
IMPORT_GATEWAY_BASE_URL,
|
||||||
build_import_gateway_headers,
|
|
||||||
resolve_provider_from_model,
|
resolve_provider_from_model,
|
||||||
)
|
)
|
||||||
from app.utils.llm_usage import extract_usage as extract_llm_usage
|
from app.utils.llm_usage import extract_usage as extract_llm_usage
|
||||||
@ -533,7 +532,6 @@ async def _call_chat_completions(
|
|||||||
temperature: float = 0.2,
|
temperature: float = 0.2,
|
||||||
timeout_seconds: Optional[float] = None,
|
timeout_seconds: Optional[float] = None,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
# Normalize model spec to provider+model and issue the unified chat call.
|
|
||||||
provider, model_name = resolve_provider_from_model(model_spec)
|
provider, model_name = resolve_provider_from_model(model_spec)
|
||||||
payload = {
|
payload = {
|
||||||
"provider": provider.value,
|
"provider": provider.value,
|
||||||
@ -547,17 +545,16 @@ async def _call_chat_completions(
|
|||||||
payload_size_bytes = len(json.dumps(payload, ensure_ascii=False).encode("utf-8"))
|
payload_size_bytes = len(json.dumps(payload, ensure_ascii=False).encode("utf-8"))
|
||||||
|
|
||||||
url = f"{IMPORT_GATEWAY_BASE_URL.rstrip('/')}/v1/chat/completions"
|
url = f"{IMPORT_GATEWAY_BASE_URL.rstrip('/')}/v1/chat/completions"
|
||||||
headers = build_import_gateway_headers()
|
|
||||||
try:
|
try:
|
||||||
|
# log the request whole info
|
||||||
logger.info(
|
logger.info(
|
||||||
"Calling chat completions API %s with model=%s payload_size=%sB",
|
"Calling chat completions API %s with model %s and size %s and payload %s",
|
||||||
url,
|
url,
|
||||||
model_name,
|
model_name,
|
||||||
payload_size_bytes,
|
payload_size_bytes,
|
||||||
|
payload,
|
||||||
)
|
)
|
||||||
response = await client.post(
|
response = await client.post(url, json=payload, timeout=timeout_seconds)
|
||||||
url, json=payload, timeout=timeout_seconds, headers=headers
|
|
||||||
)
|
|
||||||
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except httpx.HTTPError as exc:
|
except httpx.HTTPError as exc:
|
||||||
@ -706,7 +703,6 @@ async def _run_action_with_callback(
|
|||||||
input_payload: Any = None,
|
input_payload: Any = None,
|
||||||
model_spec: Optional[str] = None,
|
model_spec: Optional[str] = None,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
# Execute a pipeline action and always emit a callback capturing success/failure.
|
|
||||||
if input_payload is not None:
|
if input_payload is not None:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Pipeline action %s input: %s",
|
"Pipeline action %s input: %s",
|
||||||
@ -789,6 +785,8 @@ async def process_table_profiling_job(
|
|||||||
"table_schema_version_id": request.table_schema_version_id,
|
"table_schema_version_id": request.table_schema_version_id,
|
||||||
"llm_model": request.llm_model,
|
"llm_model": request.llm_model,
|
||||||
"llm_timeout_seconds": timeout_seconds,
|
"llm_timeout_seconds": timeout_seconds,
|
||||||
|
"workspace_id": request.workspace_id,
|
||||||
|
"rag_item_type": request.rag_item_type,
|
||||||
}
|
}
|
||||||
|
|
||||||
logging_request_payload = _profiling_request_for_log(request)
|
logging_request_payload = _profiling_request_for_log(request)
|
||||||
|
|||||||
@ -459,6 +459,18 @@ def _stable_rag_item_id(table_id: int, version_ts: int, snippet_id: str) -> int:
|
|||||||
return int(digest[:16], 16) % 9_000_000_000_000_000_000
|
return int(digest[:16], 16) % 9_000_000_000_000_000_000
|
||||||
|
|
||||||
|
|
||||||
|
def _to_serializable(value: Any) -> Any:
|
||||||
|
if value is None or isinstance(value, (str, int, float, bool)):
|
||||||
|
return value
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
return value.isoformat()
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return {k: _to_serializable(v) for k, v in value.items()}
|
||||||
|
if isinstance(value, list):
|
||||||
|
return [_to_serializable(v) for v in value]
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
|
||||||
def _build_rag_text(snippet: Dict[str, Any]) -> str:
|
def _build_rag_text(snippet: Dict[str, Any]) -> str:
|
||||||
# Deterministic text concatenation for embedding input.
|
# Deterministic text concatenation for embedding input.
|
||||||
parts: List[str] = []
|
parts: List[str] = []
|
||||||
@ -512,7 +524,8 @@ def _prepare_rag_payloads(
|
|||||||
continue
|
continue
|
||||||
rag_item_id = _stable_rag_item_id(table_id, version_ts, snippet_id)
|
rag_item_id = _stable_rag_item_id(table_id, version_ts, snippet_id)
|
||||||
rag_text = _build_rag_text(snippet)
|
rag_text = _build_rag_text(snippet)
|
||||||
merged_json = json.dumps(snippet, ensure_ascii=False)
|
serializable_snippet = _to_serializable(snippet)
|
||||||
|
merged_json = json.dumps(serializable_snippet, ensure_ascii=False)
|
||||||
updated_at_raw = snippet.get("updated_at_from_action") or now
|
updated_at_raw = snippet.get("updated_at_from_action") or now
|
||||||
if isinstance(updated_at_raw, str):
|
if isinstance(updated_at_raw, str):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -23,7 +23,7 @@ PROVIDER_KEY_ENV_MAP: Dict[str, str] = {
|
|||||||
DEFAULT_IMPORT_MODEL = os.getenv("DEFAULT_IMPORT_MODEL", "deepseek:deepseek-chat")
|
DEFAULT_IMPORT_MODEL = os.getenv("DEFAULT_IMPORT_MODEL", "deepseek:deepseek-chat")
|
||||||
NEW_API_BASE_URL = os.getenv("NEW_API_BASE_URL")
|
NEW_API_BASE_URL = os.getenv("NEW_API_BASE_URL")
|
||||||
NEW_API_AUTH_TOKEN = os.getenv("NEW_API_AUTH_TOKEN")
|
NEW_API_AUTH_TOKEN = os.getenv("NEW_API_AUTH_TOKEN")
|
||||||
RAG_API_BASE_URL = os.getenv("RAG_API_BASE_URL", "http://127.0.0.1:8000")
|
RAG_API_BASE_URL = os.getenv("RAG_API_BASE_URL", "https://tchatbi.agentcarrier.cn/chatbi/api")
|
||||||
RAG_API_AUTH_TOKEN = os.getenv("RAG_API_AUTH_TOKEN")
|
RAG_API_AUTH_TOKEN = os.getenv("RAG_API_AUTH_TOKEN")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -4,4 +4,18 @@ version = "0.1.0"
|
|||||||
description = "Add your description here"
|
description = "Add your description here"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
dependencies = []
|
dependencies = [
|
||||||
|
"fastapi>=0.111.0",
|
||||||
|
"uvicorn[standard]>=0.29.0",
|
||||||
|
"pydantic>=2.6.0",
|
||||||
|
"sqlalchemy>=2.0.28",
|
||||||
|
"pymysql>=1.1.0",
|
||||||
|
"great-expectations[profilers]==0.18.19",
|
||||||
|
"pandas>=2.0",
|
||||||
|
"numpy>=1.24",
|
||||||
|
"openpyxl>=3.1",
|
||||||
|
"httpx==0.27.2",
|
||||||
|
"python-dotenv==1.0.1",
|
||||||
|
"requests>=2.31.0",
|
||||||
|
"PyYAML>=6.0.1",
|
||||||
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user