from fastapi import FastAPI, HTTPException from pydantic import BaseModel import os import logging import openai # Read the NVIDIA API key from environment variables api_key = os.getenv("NVIDIA_API_KEY") if api_key is None: raise ValueError("NVIDIA API key not found in environment variables. Please set the NVIDIA_API_KEY.") # Initialize FastAPI app app = FastAPI() # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # NVIDIA API configuration openai.api_key = api_key openai.base_url = "https://integrate.api.nvidia.com/v1" # Define request body schema class TextGenerationRequest(BaseModel): prompt: str max_new_tokens: int = 1024 temperature: float = 0.4 top_p: float = 0.7 stream: bool = True # Define API endpoint @app.post("/generate-text") async def generate_text(request: TextGenerationRequest): try: logger.info("Generating text...") # Generate response from NVIDIA API response = openai.ChatCompletion.create( model="meta/llama-3.1-405b-instruct", messages=[{"role": "user", "content": request.prompt}], temperature=request.temperature, top_p=request.top_p, max_tokens=request.max_new_tokens, stream=request.stream, ) response_text = "" if request.stream: for chunk in response: if chunk.choices[0].delta.get("content"): response_text += chunk.choices[0].delta.content else: response_text = response["choices"][0]["message"]["content"] return {"generated_text": response_text} except Exception as e: logger.error(f"Error generating text: {e}") raise HTTPException(status_code=500, detail=str(e)) # Add a root endpoint for health checks @app.get("/") async def root(): return {"message": "Welcome Fred808 GPT"} # Add a test endpoint @app.get("/test") async def test(): return {"message": "API is running!"}