# app.py
import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from huggingface_hub import InferenceClient
from typing import Optional

# Initialize FastAPI app
app = FastAPI(
    title="LLM Chat API",
    description="API for getting chat responses from Llama model",
    version="1.0.0"
)

class ChatRequest(BaseModel):
    text: str

class ChatResponse(BaseModel):
    response: str

def llm_chat_response(text: str) -> str:
    try:
        HF_TOKEN = os.getenv("HF_TOKEN")
        if not HF_TOKEN:
            raise HTTPException(status_code=500, detail="HF_TOKEN not configured")
        
        client = InferenceClient(api_key=HF_TOKEN)
        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": text + str('describe in one line only')
                    }
                ]
            }
        ]
        
        response_from_llama = client.chat.completions.create(
            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
            messages=messages,
            max_tokens=500
        )
        return response_from_llama.choices[0].message['content']
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest):
    try:
        response = llm_chat_response(request.text)
        return ChatResponse(response=response)
    except HTTPException as he:
        raise he
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/")
async def root():
    return {"message": "Welcome to the LLM Chat API. Use POST /chat endpoint to get responses."}