102 lines
3.4 KiB
Python
102 lines
3.4 KiB
Python
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import Any, Dict, List
|
|
|
|
import httpx
|
|
|
|
from app.exceptions import ProviderAPICallError
|
|
from app.models import LLMChoice, LLMMessage, LLMProvider, LLMRequest, LLMResponse
|
|
from app.providers.base import LLMProviderClient
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class QwenProvider(LLMProviderClient):
|
|
name = LLMProvider.QWEN.value
|
|
api_key_env = "QWEN_API_KEY"
|
|
base_url = (
|
|
"https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
|
|
)
|
|
|
|
async def chat(
|
|
self, request: LLMRequest, client: httpx.AsyncClient
|
|
) -> LLMResponse:
|
|
self.ensure_stream_supported(request.stream)
|
|
|
|
parameters = {
|
|
"temperature": request.temperature,
|
|
"top_p": request.top_p,
|
|
}
|
|
if request.max_tokens is not None:
|
|
parameters["max_output_tokens"] = request.max_tokens
|
|
|
|
# Strip None values from parameters
|
|
parameters = {k: v for k, v in parameters.items() if v is not None}
|
|
|
|
payload: Dict[str, Any] = {
|
|
"model": request.model,
|
|
"input": {"messages": [msg.model_dump() for msg in request.messages]},
|
|
}
|
|
if parameters:
|
|
payload["parameters"] = parameters
|
|
|
|
payload = self.merge_payload(payload, request.extra_params)
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
try:
|
|
response = await client.post(self.base_url, json=payload, headers=headers)
|
|
response.raise_for_status()
|
|
except httpx.HTTPStatusError as exc:
|
|
status_code = exc.response.status_code
|
|
body = exc.response.text
|
|
logger.error("Qwen upstream returned %s: %s", status_code, body, exc_info=True)
|
|
raise ProviderAPICallError(
|
|
f"Qwen request failed with status {status_code}",
|
|
status_code=status_code,
|
|
response_text=body,
|
|
) from exc
|
|
except httpx.HTTPError as exc:
|
|
logger.error("Qwen transport error: %s", exc, exc_info=True)
|
|
raise ProviderAPICallError(f"Qwen request failed: {exc}") from exc
|
|
|
|
data: Dict[str, Any] = response.json()
|
|
choices = self._build_choices(data.get("output", {}))
|
|
|
|
return LLMResponse(
|
|
provider=LLMProvider.QWEN,
|
|
model=request.model,
|
|
choices=choices,
|
|
raw=data,
|
|
)
|
|
|
|
@staticmethod
|
|
def _build_choices(output: Dict[str, Any]) -> List[LLMChoice]:
|
|
choices_payload = output.get("choices", [])
|
|
if not choices_payload and output.get("text"):
|
|
return [
|
|
LLMChoice(
|
|
index=0,
|
|
message=LLMMessage(role="assistant", content=output["text"]),
|
|
)
|
|
]
|
|
|
|
choices: List[LLMChoice] = []
|
|
for idx, choice in enumerate(choices_payload):
|
|
message_data = choice.get("message") or {}
|
|
message = LLMMessage(
|
|
role=message_data.get("role", "assistant"),
|
|
content=message_data.get("content", ""),
|
|
)
|
|
choices.append(LLMChoice(index=choice.get("index", idx), message=message))
|
|
if not choices:
|
|
choices.append(
|
|
LLMChoice(index=0, message=LLMMessage(role="assistant", content=""))
|
|
)
|
|
return choices
|