111 lines
3.0 KiB
Python
111 lines
3.0 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
from typing import Any, Dict
|
|
|
|
import httpx
|
|
|
|
|
|
DEFAULT_URL = "http://127.0.0.1:8000/v1/chat/completions"
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(
|
|
description="Send a DeepSeek chat completion request to the local LLM gateway."
|
|
)
|
|
parser.add_argument(
|
|
"--url",
|
|
default=DEFAULT_URL,
|
|
help=f"Gateway endpoint URL (default: {DEFAULT_URL})",
|
|
)
|
|
parser.add_argument(
|
|
"--model",
|
|
default="deepseek-chat",
|
|
help="DeepSeek model to use (default: deepseek-chat).",
|
|
)
|
|
parser.add_argument(
|
|
"--system",
|
|
default="You are a helpful assistant.",
|
|
help="Optional system prompt.",
|
|
)
|
|
parser.add_argument(
|
|
"--prompt",
|
|
default="写一段简短的中文欢迎词。",
|
|
help="User message content to send.",
|
|
)
|
|
parser.add_argument(
|
|
"--temperature",
|
|
type=float,
|
|
default=0.7,
|
|
help="Sampling temperature.",
|
|
)
|
|
parser.add_argument(
|
|
"--max-tokens",
|
|
type=int,
|
|
default=512,
|
|
help="Maximum tokens for the response.",
|
|
)
|
|
parser.add_argument(
|
|
"--stream",
|
|
action="store_true",
|
|
help="Enable streaming mode (DeepSeek supports it).",
|
|
)
|
|
parser.add_argument(
|
|
"--extra",
|
|
help="Optional JSON string with extra provider parameters.",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
async def send_request(url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
|
|
response = await client.post(url, json=payload)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
|
|
|
|
def build_payload(args: argparse.Namespace) -> Dict[str, Any]:
|
|
extra_params = None
|
|
if args.extra:
|
|
try:
|
|
extra_params = json.loads(args.extra)
|
|
except json.JSONDecodeError as exc:
|
|
raise SystemExit(f"Invalid JSON passed to --extra: {exc}") from exc
|
|
|
|
payload: Dict[str, Any] = {
|
|
"provider": "deepseek",
|
|
"model": args.model,
|
|
"messages": [
|
|
{"role": "system", "content": args.system},
|
|
{"role": "user", "content": args.prompt},
|
|
],
|
|
"temperature": args.temperature,
|
|
"max_tokens": args.max_tokens,
|
|
"stream": args.stream,
|
|
}
|
|
|
|
if extra_params:
|
|
payload["extra_params"] = extra_params
|
|
|
|
return payload
|
|
|
|
|
|
async def main() -> None:
|
|
args = parse_args()
|
|
payload = build_payload(args)
|
|
try:
|
|
result = await send_request(args.url, payload)
|
|
except httpx.HTTPStatusError as exc:
|
|
detail = exc.response.text
|
|
raise SystemExit(f"Gateway returned {exc.response.status_code}: {detail}") from exc
|
|
except httpx.HTTPError as exc:
|
|
raise SystemExit(f"HTTP error calling gateway: {exc}") from exc
|
|
|
|
print(json.dumps(result, ensure_ascii=False, indent=2))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|