Spaces:
Running
Running
# # app.py | |
# import os | |
# import logging | |
# from fastapi import FastAPI, HTTPException | |
# from fastapi.responses import JSONResponse | |
# from pydantic import BaseModel | |
# from huggingface_hub import InferenceClient | |
# from typing import Optional | |
# # Set up logging | |
# logging.basicConfig(level=logging.INFO) | |
# logger = logging.getLogger(__name__) | |
# # Initialize FastAPI app | |
# app = FastAPI( | |
# title="LLM Chat API", | |
# description="API for getting chat responses from Llama model", | |
# version="1.0.0" | |
# ) | |
# class ChatRequest(BaseModel): | |
# text: str | |
# class ChatResponse(BaseModel): | |
# response: str | |
# status: str | |
# def llm_chat_response(text: str) -> str: | |
# try: | |
# HF_TOKEN = os.getenv("HF_TOKEN") | |
# logger.info("Checking HF_TOKEN...") | |
# if not HF_TOKEN: | |
# logger.error("HF_TOKEN not found in environment variables") | |
# raise HTTPException(status_code=500, detail="HF_TOKEN not configured") | |
# logger.info("Initializing InferenceClient...") | |
# client = InferenceClient( | |
# provider="sambanova", | |
# api_key=HF_TOKEN | |
# ) | |
# messages = [ | |
# { | |
# "role": "user", | |
# "content": [ | |
# { | |
# "type": "text", | |
# "text": text + " describe in one line only" | |
# } | |
# ] | |
# } | |
# ] | |
# logger.info("Sending request to model...") | |
# completion = client.chat.completions.create( | |
# model="meta-llama/Llama-3.2-11B-Vision-Instruct", | |
# messages=messages, | |
# max_tokens=500 | |
# ) | |
# return completion.choices[0].message['content'] | |
# except Exception as e: | |
# logger.error(f"Error in llm_chat_response: {str(e)}") | |
# raise HTTPException(status_code=500, detail=str(e)) | |
# @app.post("/chat", response_model=ChatResponse) | |
# async def chat(request: ChatRequest): | |
# try: | |
# logger.info(f"Received chat request with text: {request.text}") | |
# response = llm_chat_response(request.text) | |
# return ChatResponse(response=response, status="success") | |
# except HTTPException as he: | |
# logger.error(f"HTTP Exception in chat endpoint: {str(he)}") | |
# raise he | |
# except Exception as e: | |
# logger.error(f"Unexpected error in chat endpoint: {str(e)}") | |
# raise HTTPException(status_code=500, detail=str(e)) | |
# @app.get("/") | |
# async def root(): | |
# return {"message": "Welcome to the LLM Chat API. Use POST /chat endpoint to get responses."} | |
# @app.exception_handler(404) | |
# async def not_found_handler(request, exc): | |
# return JSONResponse( | |
# status_code=404, | |
# content={"error": "Endpoint not found. Please use POST /chat for queries."} | |
# ) | |
# @app.exception_handler(405) | |
# async def method_not_allowed_handler(request, exc): | |
# return JSONResponse( | |
# status_code=405, | |
# content={"error": "Method not allowed. Please check the API documentation."} | |
# ) | |
# app.py | |
import os | |
import logging | |
import base64 | |
import requests | |
from typing import Optional | |
from fastapi import FastAPI, HTTPException | |
from fastapi.responses import JSONResponse | |
from pydantic import BaseModel | |
from huggingface_hub import InferenceClient | |
from requests.exceptions import HTTPError | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Initialize FastAPI app | |
app = FastAPI( | |
title="LLM Chat API", | |
description="API for getting chat responses from Llama model (supports text and image input)", | |
version="1.0.0" | |
) | |
class ChatRequest(BaseModel): | |
text: str | |
image_url: Optional[str] = None | |
class ChatResponse(BaseModel): | |
response: str | |
status: str | |
def llm_chat_response(text: str, image_url: Optional[str] = None) -> str: | |
try: | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
logger.info("Checking HF_TOKEN...") | |
if not HF_TOKEN: | |
logger.error("HF_TOKEN not found in environment variables") | |
raise HTTPException(status_code=500, detail="HF_TOKEN not configured") | |
logger.info("Initializing InferenceClient...") | |
client = InferenceClient( | |
provider="sambanova", | |
api_key=HF_TOKEN | |
) | |
# Build the messages payload. | |
# For text-only queries, append a default instruction. | |
message_content = [{ | |
"type": "text", | |
"text": text + ("" if image_url else " describe in one line only") | |
}] | |
if image_url: | |
logger.info("Downloading and converting image to base64 data URI...") | |
# Download the image | |
image_response = requests.get(image_url) | |
if image_response.status_code != 200: | |
logger.error("Failed to download image from URL") | |
raise HTTPException(status_code=500, detail="Failed to download image from provided URL") | |
image_bytes = image_response.content | |
# Get the MIME type from the response headers | |
mime_type = image_response.headers.get("Content-Type") | |
if not mime_type or not mime_type.startswith("image/"): | |
logger.error("Invalid image MIME type") | |
raise HTTPException(status_code=500, detail="Invalid image MIME type") | |
# Encode image in base64 and format as a data URI | |
base64_image = base64.b64encode(image_bytes).decode("utf-8") | |
data_uri = f"data:{mime_type};base64,{base64_image}" | |
logger.info(f"Data URI created: {data_uri[:50]}...") # log first 50 chars for verification | |
message_content.append({ | |
"type": "image_url", | |
"image_url": {"url": data_uri} | |
}) | |
messages = [{ | |
"role": "user", | |
"content": message_content | |
}] | |
logger.info("Sending request to model...") | |
try: | |
completion = client.chat.completions.create( | |
model="meta-llama/Llama-3.2-11B-Vision-Instruct", | |
messages=messages, | |
max_tokens=500 | |
) | |
except HTTPError as http_err: | |
# Log HTTP errors from the request | |
logger.error(f"HTTP error occurred: {http_err.response.text}") | |
raise HTTPException(status_code=500, detail=http_err.response.text) | |
logger.info(f"Raw model response: {completion}") | |
# If the model returned an error field, capture and return that error. | |
if getattr(completion, "error", None): | |
error_details = completion.error | |
error_message = error_details.get("message", "Unknown error") | |
logger.error(f"Model returned error: {error_message}") | |
raise HTTPException(status_code=500, detail=f"Model returned error: {error_message}") | |
if not completion.choices or len(completion.choices) == 0: | |
logger.error("No choices returned from model.") | |
raise HTTPException(status_code=500, detail="Model returned no choices.") | |
# Extract the response message from the first choice. | |
choice = completion.choices[0] | |
response_message = None | |
if hasattr(choice, "message"): | |
response_message = choice.message | |
elif isinstance(choice, dict): | |
response_message = choice.get("message") | |
if not response_message: | |
logger.error(f"Response message is empty: {choice}") | |
raise HTTPException(status_code=500, detail="Model response did not include a message.") | |
content = None | |
if isinstance(response_message, dict): | |
content = response_message.get("content") | |
if content is None and hasattr(response_message, "content"): | |
content = response_message.content | |
if not content: | |
logger.error(f"Message content is missing: {response_message}") | |
raise HTTPException(status_code=500, detail="Model message did not include content.") | |
return content | |
except Exception as e: | |
logger.error(f"Error in llm_chat_response: {str(e)}") | |
raise HTTPException(status_code=500, detail=str(e)) | |
async def chat(request: ChatRequest): | |
try: | |
logger.info(f"Received chat request with text: {request.text}") | |
if request.image_url: | |
logger.info(f"Image URL provided: {request.image_url}") | |
response = llm_chat_response(request.text, request.image_url) | |
return ChatResponse(response=response, status="success") | |
except HTTPException as he: | |
logger.error(f"HTTP Exception in chat endpoint: {str(he)}") | |
raise he | |
except Exception as e: | |
logger.error(f"Unexpected error in chat endpoint: {str(e)}") | |
raise HTTPException(status_code=500, detail=str(e)) | |
async def root(): | |
return {"message": "Welcome to the LLM Chat API. Use POST /chat endpoint with 'text' and optionally 'image_url' for queries."} | |
async def not_found_handler(request, exc): | |
return JSONResponse( | |
status_code=404, | |
content={"error": "Endpoint not found. Please use POST /chat for queries."} | |
) | |
async def method_not_allowed_handler(request, exc): | |
return JSONResponse( | |
status_code=405, | |
content={"error": "Method not allowed. Please check the API documentation."} | |
) | |