数据知识回调入库
This commit is contained in:
@ -23,6 +23,7 @@ from app.models import (
|
||||
LLMRole,
|
||||
)
|
||||
from app.settings import DEFAULT_IMPORT_MODEL, get_supported_import_models
|
||||
from app.utils.llm_usage import extract_usage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -375,18 +376,6 @@ async def dispatch_import_analysis_job(
|
||||
|
||||
return result
|
||||
|
||||
# 兼容处理多模型的使用量字段提取
|
||||
def extract_usage(resp_json: dict) -> dict:
|
||||
usage = resp_json.get("usage") or resp_json.get("usageMetadata") or {}
|
||||
return {
|
||||
"prompt_tokens": usage.get("prompt_tokens") or usage.get("input_tokens") or usage.get("promptTokenCount"),
|
||||
"completion_tokens": usage.get("completion_tokens") or usage.get("output_tokens") or usage.get("candidatesTokenCount"),
|
||||
"total_tokens": usage.get("total_tokens") or usage.get("totalTokenCount") or (
|
||||
(usage.get("prompt_tokens") or usage.get("input_tokens") or 0)
|
||||
+ (usage.get("completion_tokens") or usage.get("output_tokens") or 0)
|
||||
)
|
||||
}
|
||||
|
||||
async def notify_import_analysis_callback(
|
||||
callback_url: str,
|
||||
payload: Dict[str, Any],
|
||||
|
||||
@ -26,6 +26,7 @@ from app.services.import_analysis import (
|
||||
IMPORT_GATEWAY_BASE_URL,
|
||||
resolve_provider_from_model,
|
||||
)
|
||||
from app.utils.llm_usage import extract_usage as extract_llm_usage
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -37,7 +38,7 @@ PROMPT_FILENAMES = {
|
||||
"snippet_generator": "snippet_generator.md",
|
||||
"snippet_alias": "snippet_alias_generator.md",
|
||||
}
|
||||
DEFAULT_CHAT_TIMEOUT_SECONDS = 90.0
|
||||
DEFAULT_CHAT_TIMEOUT_SECONDS = 180.0
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -47,6 +48,12 @@ class GEProfilingArtifacts:
|
||||
docs_path: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMCallResult:
|
||||
data: Any
|
||||
usage: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class PipelineActionType:
|
||||
GE_PROFILING = "ge_profiling"
|
||||
GE_RESULT_DESC = "ge_result_desc"
|
||||
@ -124,11 +131,16 @@ def _extract_json_payload(content: str) -> str:
|
||||
if not stripped:
|
||||
raise ValueError("Empty LLM content.")
|
||||
|
||||
for opener, closer in (("{", "}"), ("[", "]")):
|
||||
start = stripped.find(opener)
|
||||
end = stripped.rfind(closer)
|
||||
if start != -1 and end != -1 and end > start:
|
||||
candidate = stripped[start : end + 1].strip()
|
||||
decoder = json.JSONDecoder()
|
||||
for idx, char in enumerate(stripped):
|
||||
if char not in {"{", "["}:
|
||||
continue
|
||||
try:
|
||||
_, end = decoder.raw_decode(stripped[idx:])
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
candidate = stripped[idx : idx + end].strip()
|
||||
if candidate:
|
||||
return candidate
|
||||
|
||||
return stripped
|
||||
@ -559,7 +571,9 @@ async def _call_chat_completions(
|
||||
except ValueError as exc:
|
||||
raise ProviderAPICallError("Chat completions response was not valid JSON.") from exc
|
||||
|
||||
return _parse_completion_payload(response_payload)
|
||||
parsed_payload = _parse_completion_payload(response_payload)
|
||||
usage_info = extract_llm_usage(response_payload)
|
||||
return LLMCallResult(data=parsed_payload, usage=usage_info)
|
||||
|
||||
|
||||
def _normalize_for_json(value: Any) -> Any:
|
||||
@ -628,7 +642,7 @@ async def _execute_result_desc(
|
||||
client=client,
|
||||
timeout_seconds=timeout_seconds,
|
||||
)
|
||||
if not isinstance(llm_output, dict):
|
||||
if not isinstance(llm_output.data, dict):
|
||||
raise ProviderAPICallError("GE result description payload must be a JSON object.")
|
||||
return llm_output
|
||||
|
||||
@ -651,7 +665,7 @@ async def _execute_snippet_generation(
|
||||
client=client,
|
||||
timeout_seconds=timeout_seconds,
|
||||
)
|
||||
if not isinstance(llm_output, list):
|
||||
if not isinstance(llm_output.data, list):
|
||||
raise ProviderAPICallError("Snippet generator must return a JSON array.")
|
||||
return llm_output
|
||||
|
||||
@ -674,7 +688,7 @@ async def _execute_snippet_alias(
|
||||
client=client,
|
||||
timeout_seconds=timeout_seconds,
|
||||
)
|
||||
if not isinstance(llm_output, list):
|
||||
if not isinstance(llm_output.data, list):
|
||||
raise ProviderAPICallError("Snippet alias generator must return a JSON array.")
|
||||
return llm_output
|
||||
|
||||
@ -711,6 +725,12 @@ async def _run_action_with_callback(
|
||||
await _post_callback(callback_url, failure_payload, client)
|
||||
raise
|
||||
|
||||
usage_info: Optional[Dict[str, Any]] = None
|
||||
result_payload = result
|
||||
if isinstance(result, LLMCallResult):
|
||||
usage_info = result.usage
|
||||
result_payload = result.data
|
||||
|
||||
success_payload = dict(callback_base)
|
||||
success_payload.update(
|
||||
{
|
||||
@ -724,23 +744,26 @@ async def _run_action_with_callback(
|
||||
logger.info(
|
||||
"Pipeline action %s output: %s",
|
||||
action_type,
|
||||
_preview_for_log(result),
|
||||
_preview_for_log(result_payload),
|
||||
)
|
||||
|
||||
if action_type == PipelineActionType.GE_PROFILING:
|
||||
artifacts: GEProfilingArtifacts = result
|
||||
success_payload["profiling_json"] = artifacts.profiling_result
|
||||
success_payload["profiling_summary"] = artifacts.profiling_summary
|
||||
artifacts: GEProfilingArtifacts = result_payload
|
||||
success_payload["ge_profiling_json"] = artifacts.profiling_result
|
||||
success_payload["ge_profiling_summary"] = artifacts.profiling_summary
|
||||
success_payload["ge_report_path"] = artifacts.docs_path
|
||||
elif action_type == PipelineActionType.GE_RESULT_DESC:
|
||||
success_payload["table_desc_json"] = result
|
||||
success_payload["ge_result_desc_json"] = result_payload
|
||||
elif action_type == PipelineActionType.SNIPPET:
|
||||
success_payload["snippet_json"] = result
|
||||
success_payload["snippet_json"] = result_payload
|
||||
elif action_type == PipelineActionType.SNIPPET_ALIAS:
|
||||
success_payload["snippet_alias_json"] = result
|
||||
success_payload["snippet_alias_json"] = result_payload
|
||||
|
||||
if usage_info:
|
||||
success_payload["llm_usage"] = usage_info
|
||||
|
||||
await _post_callback(callback_url, success_payload, client)
|
||||
return result
|
||||
return result_payload
|
||||
|
||||
|
||||
async def process_table_profiling_job(
|
||||
|
||||
@ -55,6 +55,28 @@ def _collect_common_columns(request: TableSnippetUpsertRequest) -> Dict[str, Any
|
||||
"table_schema": _prepare_table_schema(request.table_schema),
|
||||
}
|
||||
|
||||
payload.update(
|
||||
{
|
||||
"ge_profiling_json": None,
|
||||
"ge_profiling_json_size_bytes": None,
|
||||
"ge_profiling_summary": None,
|
||||
"ge_profiling_summary_size_bytes": None,
|
||||
"ge_profiling_total_size_bytes": None,
|
||||
"ge_profiling_html_report_url": None,
|
||||
"ge_result_desc_json": None,
|
||||
"ge_result_desc_json_size_bytes": None,
|
||||
"snippet_json": None,
|
||||
"snippet_json_size_bytes": None,
|
||||
"snippet_alias_json": None,
|
||||
"snippet_alias_json_size_bytes": None,
|
||||
}
|
||||
)
|
||||
|
||||
if request.llm_usage is not None:
|
||||
llm_usage_json, _ = _serialize_json(request.llm_usage)
|
||||
if llm_usage_json is not None:
|
||||
payload["llm_usage"] = llm_usage_json
|
||||
|
||||
if request.error_code is not None:
|
||||
logger.debug("Adding error_code: %s", request.error_code)
|
||||
payload["error_code"] = request.error_code
|
||||
@ -80,35 +102,35 @@ def _apply_action_payload(
|
||||
) -> None:
|
||||
logger.debug("Applying action-specific payload for action_type=%s", request.action_type)
|
||||
if request.action_type == ActionType.GE_PROFILING:
|
||||
full_json, full_size = _serialize_json(request.result_json)
|
||||
summary_json, summary_size = _serialize_json(request.result_summary_json)
|
||||
full_json, full_size = _serialize_json(request.ge_profiling_json)
|
||||
summary_json, summary_size = _serialize_json(request.ge_profiling_summary)
|
||||
if full_json is not None:
|
||||
payload["ge_profiling_full"] = full_json
|
||||
payload["ge_profiling_full_size_bytes"] = full_size
|
||||
payload["ge_profiling_json"] = full_json
|
||||
payload["ge_profiling_json_size_bytes"] = full_size
|
||||
if summary_json is not None:
|
||||
payload["ge_profiling_summary"] = summary_json
|
||||
payload["ge_profiling_summary_size_bytes"] = summary_size
|
||||
if full_size is not None or summary_size is not None:
|
||||
payload["ge_profiling_total_size_bytes"] = (full_size or 0) + (
|
||||
summary_size or 0
|
||||
)
|
||||
if request.html_report_url:
|
||||
payload["ge_profiling_html_report_url"] = request.html_report_url
|
||||
if request.ge_profiling_total_size_bytes is not None:
|
||||
payload["ge_profiling_total_size_bytes"] = request.ge_profiling_total_size_bytes
|
||||
elif full_size is not None or summary_size is not None:
|
||||
payload["ge_profiling_total_size_bytes"] = (full_size or 0) + (summary_size or 0)
|
||||
if request.ge_profiling_html_report_url:
|
||||
payload["ge_profiling_html_report_url"] = request.ge_profiling_html_report_url
|
||||
elif request.action_type == ActionType.GE_RESULT_DESC:
|
||||
full_json, full_size = _serialize_json(request.result_json)
|
||||
full_json, full_size = _serialize_json(request.ge_result_desc_json)
|
||||
if full_json is not None:
|
||||
payload["ge_result_desc_full"] = full_json
|
||||
payload["ge_result_desc_full_size_bytes"] = full_size
|
||||
payload["ge_result_desc_json"] = full_json
|
||||
payload["ge_result_desc_json_size_bytes"] = full_size
|
||||
elif request.action_type == ActionType.SNIPPET:
|
||||
full_json, full_size = _serialize_json(request.result_json)
|
||||
full_json, full_size = _serialize_json(request.snippet_json)
|
||||
if full_json is not None:
|
||||
payload["snippet_full"] = full_json
|
||||
payload["snippet_full_size_bytes"] = full_size
|
||||
payload["snippet_json"] = full_json
|
||||
payload["snippet_json_size_bytes"] = full_size
|
||||
elif request.action_type == ActionType.SNIPPET_ALIAS:
|
||||
full_json, full_size = _serialize_json(request.result_json)
|
||||
full_json, full_size = _serialize_json(request.snippet_alias_json)
|
||||
if full_json is not None:
|
||||
payload["snippet_alias_full"] = full_json
|
||||
payload["snippet_alias_full_size_bytes"] = full_size
|
||||
payload["snippet_alias_json"] = full_json
|
||||
payload["snippet_alias_json_size_bytes"] = full_size
|
||||
else:
|
||||
logger.error("Unsupported action type encountered: %s", request.action_type)
|
||||
raise ValueError(f"Unsupported action type '{request.action_type}'.")
|
||||
|
||||
Reference in New Issue
Block a user