Spaces:

Fred808
/

808-GPT2

Running

Fred808 commited on 22 days ago

Commit

e71208c

verified ·

1 Parent(s): 64c0b0e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,8 +6,8 @@ import torch
 # Initialize FastAPI app
 app = FastAPI()
-# Load the Falcon-7B model with 8-bit quantization (if CUDA is available)
-model_id = "tiiuae/falcon-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Check if CUDA is available
@@ -15,16 +15,16 @@ if torch.cuda.is_available():
     # Load the model with 8-bit quantization for GPU
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        load_in_8bit=True,
-        device_map="auto",
-        trust_remote_code=True
     )
 else:
     # Fallback to CPU or full precision
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto",
-        trust_remote_code=True
     )
 # Create a text generation pipeline
@@ -59,4 +59,4 @@ async def generate_text(request: TextGenerationRequest):
 # Add a root endpoint for health checks
 @app.get("/test")
 async def root():
-    return {"message": "API is running!"}

 # Initialize FastAPI app
 app = FastAPI()
+# Load the latest Falcon-7B model with 8-bit quantization (if CUDA is available)
+model_id = "tiiuae/falcon-7b-instruct"  # Update this if there's a newer version
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Check if CUDA is available
     # Load the model with 8-bit quantization for GPU
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        load_in_8bit=True,  # Use 8-bit quantization for GPU
+        device_map="auto",  # Automatically map the model to available devices
+        trust_remote_code=True  # Required for Falcon models
     )
 else:
     # Fallback to CPU or full precision
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="auto",  # Automatically map the model to available devices
+        trust_remote_code=True  # Required for Falcon models
     )
 # Create a text generation pipeline
 # Add a root endpoint for health checks
 @app.get("/test")
 async def root():
+    return {"message": "API is running!"}