106 lines
2.8 KiB
Python
106 lines
2.8 KiB
Python
import asyncio
|
||
import json
|
||
import urllib.request
|
||
from contextlib import asynccontextmanager
|
||
from typing import Optional
|
||
|
||
from fastapi import FastAPI
|
||
from pydantic import BaseModel
|
||
|
||
from mcp_agent.app import MCPApp
|
||
from mcp_agent.agents.agent import Agent
|
||
from mcp_agent.workflows.llm.augmented_llm_openai import OpenAIAugmentedLLM
|
||
|
||
|
||
OLLAMA_MODEL = "qwen3:1.7b"
|
||
OLLAMA_KEEP_ALIVE = "10h"
|
||
|
||
AGENT_INSTRUCTION = (
|
||
"Ты ассистент по погоде. "
|
||
"Если нужны актуальные данные — используй доступные инструменты."
|
||
"ОТВЕЧАЙ ТОЛЬКО НА РУССКОМ ЯЗЫКЕ!"
|
||
"Не выводи блоки <think> или внутренние рассуждения, отвечай только итогом."
|
||
)
|
||
|
||
MCP_SERVER_NAMES = ["weather"]
|
||
|
||
|
||
def warmup_ollama(model: str, keep_alive: str = "10h") -> None:
|
||
payload = {
|
||
"model": model,
|
||
"prompt": "",
|
||
"stream": False,
|
||
"keep_alive": keep_alive,
|
||
}
|
||
req = urllib.request.Request(
|
||
"http://localhost:11434/api/generate",
|
||
data=json.dumps(payload).encode("utf-8"),
|
||
headers={"Content-Type": "application/json"},
|
||
method="POST",
|
||
)
|
||
try:
|
||
urllib.request.urlopen(req, timeout=5).read()
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
class AskRequest(BaseModel):
|
||
question: str
|
||
|
||
|
||
class AskResponse(BaseModel):
|
||
answer: str
|
||
|
||
|
||
app_mcp = MCPApp(name="weather_agent")
|
||
_agent: Optional[Agent] = None
|
||
_llm: Optional[OpenAIAugmentedLLM] = None
|
||
_lock: asyncio.Lock = asyncio.Lock()
|
||
|
||
|
||
@asynccontextmanager
|
||
async def lifespan(app: FastAPI):
|
||
"""
|
||
Startup:
|
||
- прогреваем Ollama
|
||
- поднимаем MCPApp
|
||
- создаём Agent и подключаем LLM один раз
|
||
Shutdown:
|
||
- корректно закрываем agent и MCPApp
|
||
"""
|
||
global _agent, _llm
|
||
|
||
warmup_ollama(OLLAMA_MODEL, keep_alive=OLLAMA_KEEP_ALIVE)
|
||
|
||
async with app_mcp.run():
|
||
_agent = Agent(
|
||
name="weather",
|
||
instruction=AGENT_INSTRUCTION,
|
||
server_names=MCP_SERVER_NAMES,
|
||
)
|
||
|
||
async with _agent:
|
||
_llm = await _agent.attach_llm(OpenAIAugmentedLLM)
|
||
yield
|
||
|
||
api = FastAPI(title="Weather MCP Agent API", lifespan=lifespan)
|
||
|
||
|
||
@api.post("/ask", response_model=AskResponse)
|
||
async def ask(req: AskRequest) -> AskResponse:
|
||
"""
|
||
Принимает вопрос, возвращает краткий ответ на русском.
|
||
"""
|
||
if _llm is None:
|
||
return AskResponse(answer="Сервис не готов. Попробуйте позже.")
|
||
|
||
async with _lock:
|
||
answer = await _llm.generate_str(req.question)
|
||
|
||
return AskResponse(answer=answer)
|
||
|
||
|
||
@api.get("/health")
|
||
async def health():
|
||
return {"status": "ok"}
|