# # app.py # import os # import logging # from fastapi import FastAPI, HTTPException # from fastapi.responses import JSONResponse # from pydantic import BaseModel # from huggingface_hub import InferenceClient # from typing import Optional # # Set up logging # logging.basicConfig(level=logging.INFO) # logger = logging.getLogger(__name__) # # Initialize FastAPI app # app = FastAPI( # title="LLM Chat API", # description="API for getting chat responses from Llama model", # version="1.0.0" # ) # class ChatRequest(BaseModel): # text: str # class ChatResponse(BaseModel): # response: str # status: str # def llm_chat_response(text: str) -> str: # try: # HF_TOKEN = os.getenv("HF_TOKEN") # logger.info("Checking HF_TOKEN...") # if not HF_TOKEN: # logger.error("HF_TOKEN not found in environment variables") # raise HTTPException(status_code=500, detail="HF_TOKEN not configured") # logger.info("Initializing InferenceClient...") # client = InferenceClient( # provider="sambanova", # api_key=HF_TOKEN # ) # messages = [ # { # "role": "user", # "content": [ # { # "type": "text", # "text": text + " describe in one line only" # } # ] # } # ] # logger.info("Sending request to model...") # completion = client.chat.completions.create( # model="meta-llama/Llama-3.2-11B-Vision-Instruct", # messages=messages, # max_tokens=500 # ) # return completion.choices[0].message['content'] # except Exception as e: # logger.error(f"Error in llm_chat_response: {str(e)}") # raise HTTPException(status_code=500, detail=str(e)) # @app.post("/chat", response_model=ChatResponse) # async def chat(request: ChatRequest): # try: # logger.info(f"Received chat request with text: {request.text}") # response = llm_chat_response(request.text) # return ChatResponse(response=response, status="success") # except HTTPException as he: # logger.error(f"HTTP Exception in chat endpoint: {str(he)}") # raise he # except Exception as e: # logger.error(f"Unexpected error in chat endpoint: {str(e)}") # raise HTTPException(status_code=500, detail=str(e)) # @app.get("/") # async def root(): # return {"message": "Welcome to the LLM Chat API. Use POST /chat endpoint to get responses."} # @app.exception_handler(404) # async def not_found_handler(request, exc): # return JSONResponse( # status_code=404, # content={"error": "Endpoint not found. Please use POST /chat for queries."} # ) # @app.exception_handler(405) # async def method_not_allowed_handler(request, exc): # return JSONResponse( # status_code=405, # content={"error": "Method not allowed. Please check the API documentation."} # ) # app.py import os import logging import base64 import requests from typing import Optional from fastapi import FastAPI, HTTPException from fastapi.responses import JSONResponse from pydantic import BaseModel from huggingface_hub import InferenceClient from requests.exceptions import HTTPError # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Initialize FastAPI app app = FastAPI( title="LLM Chat API", description="API for getting chat responses from Llama model (supports text and image input)", version="1.0.0" ) class ChatRequest(BaseModel): text: str image_url: Optional[str] = None class ChatResponse(BaseModel): response: str status: str def llm_chat_response(text: str, image_url: Optional[str] = None) -> str: try: HF_TOKEN = os.getenv("HF_TOKEN") logger.info("Checking HF_TOKEN...") if not HF_TOKEN: logger.error("HF_TOKEN not found in environment variables") raise HTTPException(status_code=500, detail="HF_TOKEN not configured") logger.info("Initializing InferenceClient...") client = InferenceClient( provider="sambanova", api_key=HF_TOKEN ) # Build the messages payload. # For text-only queries, append a default instruction. message_content = [{ "type": "text", "text": text + ("" if image_url else " describe in one line only") }] if image_url: logger.info("Downloading and converting image to base64 data URI...") # Download the image image_response = requests.get(image_url) if image_response.status_code != 200: logger.error("Failed to download image from URL") raise HTTPException(status_code=500, detail="Failed to download image from provided URL") image_bytes = image_response.content # Get the MIME type from the response headers mime_type = image_response.headers.get("Content-Type") if not mime_type or not mime_type.startswith("image/"): logger.error("Invalid image MIME type") raise HTTPException(status_code=500, detail="Invalid image MIME type") # Encode image in base64 and format as a data URI base64_image = base64.b64encode(image_bytes).decode("utf-8") data_uri = f"data:{mime_type};base64,{base64_image}" logger.info(f"Data URI created: {data_uri[:50]}...") # log first 50 chars for verification message_content.append({ "type": "image_url", "image_url": {"url": data_uri} }) messages = [{ "role": "user", "content": message_content }] logger.info("Sending request to model...") try: completion = client.chat.completions.create( model="meta-llama/Llama-3.2-11B-Vision-Instruct", messages=messages, max_tokens=500 ) except HTTPError as http_err: # Log HTTP errors from the request logger.error(f"HTTP error occurred: {http_err.response.text}") raise HTTPException(status_code=500, detail=http_err.response.text) logger.info(f"Raw model response: {completion}") # If the model returned an error field, capture and return that error. if getattr(completion, "error", None): error_details = completion.error error_message = error_details.get("message", "Unknown error") logger.error(f"Model returned error: {error_message}") raise HTTPException(status_code=500, detail=f"Model returned error: {error_message}") if not completion.choices or len(completion.choices) == 0: logger.error("No choices returned from model.") raise HTTPException(status_code=500, detail="Model returned no choices.") # Extract the response message from the first choice. choice = completion.choices[0] response_message = None if hasattr(choice, "message"): response_message = choice.message elif isinstance(choice, dict): response_message = choice.get("message") if not response_message: logger.error(f"Response message is empty: {choice}") raise HTTPException(status_code=500, detail="Model response did not include a message.") content = None if isinstance(response_message, dict): content = response_message.get("content") if content is None and hasattr(response_message, "content"): content = response_message.content if not content: logger.error(f"Message content is missing: {response_message}") raise HTTPException(status_code=500, detail="Model message did not include content.") return content except Exception as e: logger.error(f"Error in llm_chat_response: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @app.post("/chat", response_model=ChatResponse) async def chat(request: ChatRequest): try: logger.info(f"Received chat request with text: {request.text}") if request.image_url: logger.info(f"Image URL provided: {request.image_url}") response = llm_chat_response(request.text, request.image_url) return ChatResponse(response=response, status="success") except HTTPException as he: logger.error(f"HTTP Exception in chat endpoint: {str(he)}") raise he except Exception as e: logger.error(f"Unexpected error in chat endpoint: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) @app.get("/") async def root(): return {"message": "Welcome to the LLM Chat API. Use POST /chat endpoint with 'text' and optionally 'image_url' for queries."} @app.exception_handler(404) async def not_found_handler(request, exc): return JSONResponse( status_code=404, content={"error": "Endpoint not found. Please use POST /chat for queries."} ) @app.exception_handler(405) async def method_not_allowed_handler(request, exc): return JSONResponse( status_code=405, content={"error": "Method not allowed. Please check the API documentation."} )