Spaces:

sagaxlearn
/

Chat

Running

Chat / app.py

Create app.py

cb0bf83 verified 15 days ago

1.78 kB

	# app.py
	import os
	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from huggingface_hub import InferenceClient
	from typing import Optional

	# Initialize FastAPI app
	app = FastAPI(
	title="LLM Chat API",
	description="API for getting chat responses from Llama model",
	version="1.0.0"
	)

	class ChatRequest(BaseModel):
	text: str

	class ChatResponse(BaseModel):
	response: str

	def llm_chat_response(text: str) -> str:
	try:
	HF_TOKEN = os.getenv("HF_TOKEN")
	if not HF_TOKEN:
	raise HTTPException(status_code=500, detail="HF_TOKEN not configured")

	client = InferenceClient(api_key=HF_TOKEN)
	messages = [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": text + str('describe in one line only')
	}
	]
	}
	]

	response_from_llama = client.chat.completions.create(
	model="meta-llama/Llama-3.2-11B-Vision-Instruct",
	messages=messages,
	max_tokens=500
	)
	return response_from_llama.choices[0].message['content']
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.post("/chat", response_model=ChatResponse)
	async def chat(request: ChatRequest):
	try:
	response = llm_chat_response(request.text)
	return ChatResponse(response=response)
	except HTTPException as he:
	raise he
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/")
	async def root():
	return {"message": "Welcome to the LLM Chat API. Use POST /chat endpoint to get responses."}