Spaces:

Fred808
/

808-GPT2

Running

App Files Files Community

Fred808 commited on 22 days ago

Commit

e0e5738

verified ·

1 Parent(s): 4b7f924

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -8

app.py CHANGED Viewed

@@ -2,12 +2,17 @@ from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 # Initialize FastAPI app
 app = FastAPI()
-# Load the latest Falcon-7B model with 8-bit quantization (if CUDA is available)
-model_id = "tiiuae/falcon-7b-instruct"  # Update this if there's a newer version
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Check if CUDA is available
@@ -15,16 +20,16 @@ if torch.cuda.is_available():
     # Load the model with 8-bit quantization for GPU
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        load_in_8bit=True,  # Use 8-bit quantization for GPU
-        device_map="auto",  # Automatically map the model to available devices
-        trust_remote_code=True  # Required for Falcon models
     )
 else:
     # Fallback to CPU or full precision
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto",  # Automatically map the model to available devices
-        trust_remote_code=True  # Required for Falcon models
     )
 # Create a text generation pipeline
@@ -43,7 +48,7 @@ class TextGenerationRequest(BaseModel):
 @app.post("/generate-text")
 async def generate_text(request: TextGenerationRequest):
     try:
-        # Generate text using the pipeline
         outputs = pipe(
             request.prompt,
             max_new_tokens=request.max_new_tokens,
@@ -54,6 +59,7 @@ async def generate_text(request: TextGenerationRequest):
         )
         return {"generated_text": outputs[0]["generated_text"]}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 # Add a root endpoint for health checks

 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
+import logging
 # Initialize FastAPI app
 app = FastAPI()
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load the Falcon-7B model with 8-bit quantization (if CUDA is available)
+model_id = "tiiuae/falcon-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Check if CUDA is available
     # Load the model with 8-bit quantization for GPU
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        revision="main",  # Pin to a specific revision
+        load_in_8bit=True,
+        device_map="auto"
     )
 else:
     # Fallback to CPU or full precision
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        revision="main",  # Pin to a specific revision
+        device_map="auto"
     )
 # Create a text generation pipeline
 @app.post("/generate-text")
 async def generate_text(request: TextGenerationRequest):
     try:
+        logger.info("Generating text...")
         outputs = pipe(
             request.prompt,
             max_new_tokens=request.max_new_tokens,
         )
         return {"generated_text": outputs[0]["generated_text"]}
     except Exception as e:
+        logger.error(f"Error generating text: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 # Add a root endpoint for health checks