# app.py import os from fastapi import FastAPI, HTTPException from pydantic import BaseModel from huggingface_hub import InferenceClient from typing import Optional # Initialize FastAPI app app = FastAPI( title="LLM Chat API", description="API for getting chat responses from Llama model", version="1.0.0" ) class ChatRequest(BaseModel): text: str class ChatResponse(BaseModel): response: str def llm_chat_response(text: str) -> str: try: HF_TOKEN = os.getenv("HF_TOKEN") if not HF_TOKEN: raise HTTPException(status_code=500, detail="HF_TOKEN not configured") client = InferenceClient(api_key=HF_TOKEN) messages = [ { "role": "user", "content": [ { "type": "text", "text": text + str('describe in one line only') } ] } ] response_from_llama = client.chat.completions.create( model="meta-llama/Llama-3.2-11B-Vision-Instruct", messages=messages, max_tokens=500 ) return response_from_llama.choices[0].message['content'] except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/chat", response_model=ChatResponse) async def chat(request: ChatRequest): try: response = llm_chat_response(request.text) return ChatResponse(response=response) except HTTPException as he: raise he except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/") async def root(): return {"message": "Welcome to the LLM Chat API. Use POST /chat endpoint to get responses."}