Spaces:
Running
Running
# app.py | |
import os | |
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from huggingface_hub import InferenceClient | |
from typing import Optional | |
# Initialize FastAPI app | |
app = FastAPI( | |
title="LLM Chat API", | |
description="API for getting chat responses from Llama model", | |
version="1.0.0" | |
) | |
class ChatRequest(BaseModel): | |
text: str | |
class ChatResponse(BaseModel): | |
response: str | |
def llm_chat_response(text: str) -> str: | |
try: | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
if not HF_TOKEN: | |
raise HTTPException(status_code=500, detail="HF_TOKEN not configured") | |
client = InferenceClient(api_key=HF_TOKEN) | |
messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": text + str('describe in one line only') | |
} | |
] | |
} | |
] | |
response_from_llama = client.chat.completions.create( | |
model="meta-llama/Llama-3.2-11B-Vision-Instruct", | |
messages=messages, | |
max_tokens=500 | |
) | |
return response_from_llama.choices[0].message['content'] | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
async def chat(request: ChatRequest): | |
try: | |
response = llm_chat_response(request.text) | |
return ChatResponse(response=response) | |
except HTTPException as he: | |
raise he | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
async def root(): | |
return {"message": "Welcome to the LLM Chat API. Use POST /chat endpoint to get responses."} |