Update app.py
Browse files
app.py
CHANGED
@@ -26,7 +26,7 @@ model_id = "google/gemma-2b" # Use Google Gemma 2B
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
27 |
|
28 |
# Load the model without quantization for CPU
|
29 |
-
logger.info("Loading model
|
30 |
model = AutoModelForCausalLM.from_pretrained(
|
31 |
model_id,
|
32 |
torch_dtype=torch.float32, # Use FP32 for CPU compatibility
|
@@ -37,8 +37,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
37 |
pipe = pipeline(
|
38 |
"text-generation",
|
39 |
model=model,
|
40 |
-
tokenizer=tokenizer
|
41 |
-
device="cpu" # Explicitly set device to CPU
|
42 |
)
|
43 |
|
44 |
# Define request body schema
|
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
27 |
|
28 |
# Load the model without quantization for CPU
|
29 |
+
logger.info("Loading model...")
|
30 |
model = AutoModelForCausalLM.from_pretrained(
|
31 |
model_id,
|
32 |
torch_dtype=torch.float32, # Use FP32 for CPU compatibility
|
|
|
37 |
pipe = pipeline(
|
38 |
"text-generation",
|
39 |
model=model,
|
40 |
+
tokenizer=tokenizer
|
|
|
41 |
)
|
42 |
|
43 |
# Define request body schema
|