Spaces:

Fred808
/

808-GPT2

Running

App Files Files Community

Fred808 commited on 9 days ago

Commit

7c1d81b

verified ·

1 Parent(s): 36267e8

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -24

app.py CHANGED Viewed

@@ -1,12 +1,17 @@
 import os
-import requests
 import logging
 # Read the NVIDIA API key from environment variables
 api_key = os.getenv("NVIDIA_API_KEY")
 if api_key is None:
     raise ValueError("NVIDIA API key not found in environment variables. Please set the NVIDIA_API_KEY.")
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -18,32 +23,62 @@ headers = {
     "Content-Type": "application/json"
 }
-# Define request payload
-payload = {
-    "model": "meta/llama-3.1-405b-instruct",  # Model for NVIDIA's text generation
-    "messages": [{"role": "user", "content": "Write a limerick about the wonders of GPU computing."}],
-    "temperature": 0.2,
-    "top_p": 0.7,
-    "max_tokens": 1024,
-    "stream": True
-}
-# Call NVIDIA's API for text generation
-try:
-    logger.info("Generating text with NVIDIA API...")
-    response = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, stream=True)
-    if response.status_code == 200:
-        # Stream the response
         response_text = ""
         for chunk in response.iter_lines():
             if chunk:
                 data = chunk.decode("utf-8")
-                # Extract the content from the response (adjust based on actual API response structure)
-                if "content" in data:
-                    response_text += data["choices"][0]["delta"].get("content", "")
-                    print(response_text, end="")  # Print content as it's received
-    else:
-        logger.error(f"Error: {response.status_code} - {response.text}")
-except Exception as e:
-    logger.error(f"Error generating text: {e}")

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
 import os
 import logging
+import requests
 # Read the NVIDIA API key from environment variables
 api_key = os.getenv("NVIDIA_API_KEY")
 if api_key is None:
     raise ValueError("NVIDIA API key not found in environment variables. Please set the NVIDIA_API_KEY.")
+# Initialize FastAPI app
+app = FastAPI()
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
     "Content-Type": "application/json"
 }
+# Define request body schema
+class TextGenerationRequest(BaseModel):
+    prompt: str
+    max_new_tokens: int = 1024
+    temperature: float = 0.4
+    top_p: float = 0.7
+    stream: bool = True
+# Define API endpoint to generate text
+@app.post("/generate-text")
+async def generate_text(request: TextGenerationRequest):
+    try:
+        logger.info("Generating text with NVIDIA API...")
+        # Prepare the payload for the NVIDIA API request
+        payload = {
+            "model": "meta/llama-3.1-405b-instruct",  # NVIDIA-specific model
+            "messages": [{"role": "user", "content": request.prompt}],
+            "temperature": request.temperature,
+            "top_p": request.top_p,
+            "max_tokens": request.max_new_tokens,
+            "stream": request.stream
+        }
+        # Send POST request to NVIDIA API (streaming enabled)
+        response = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, stream=True)
+        if response.status_code != 200:
+            raise HTTPException(status_code=response.status_code, detail=f"Error: {response.text}")
+        # Process the streaming response
         response_text = ""
         for chunk in response.iter_lines():
             if chunk:
                 data = chunk.decode("utf-8")
+                # Assuming the API response contains 'choices' and 'delta'
+                try:
+                    content = data.get("choices", [{}])[0].get("delta", {}).get("content", "")
+                    if content:
+                        response_text += content
+                        print(content, end="")  # Print the content to stream it out
+                except Exception as e:
+                    logger.error(f"Error processing chunk: {e}")
+        return {"generated_text": response_text}
+    except Exception as e:
+        logger.error(f"Error generating text: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+# Add a root endpoint for health checks
+@app.get("/")
+async def root():
+    return {"message": "Welcome to the NVIDIA Text Generation API!"}
+# Add a test endpoint
+@app.get("/test")
+async def test():
+    return {"message": "API is running!"}