Spaces:

Fred808
/

808-GPT2

Sleeping

Fred808 commited on Jan 9

Commit

c36fb16

verified ·

1 Parent(s): e0e5738

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,26 +11,10 @@ app = FastAPI()
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Load the Falcon-7B model with 8-bit quantization (if CUDA is available)
-model_id = "tiiuae/falcon-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-# Check if CUDA is available
-if torch.cuda.is_available():
-    # Load the model with 8-bit quantization for GPU
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        revision="main",  # Pin to a specific revision
-        load_in_8bit=True,
-        device_map="auto"
-    )
-else:
-    # Fallback to CPU or full precision
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        revision="main",  # Pin to a specific revision
-        device_map="auto"
-    )
 # Create a text generation pipeline
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
@@ -38,8 +22,8 @@ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 # Define request body schema
 class TextGenerationRequest(BaseModel):
     prompt: str
-    max_new_tokens: int = 50
-    temperature: float = 0.7
     top_k: int = 50
     top_p: float = 0.9
     do_sample: bool = True

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Load the GPT-2 model and tokenizer
+model_id = "gpt2"  # Use GPT-2
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
 # Create a text generation pipeline
 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 # Define request body schema
 class TextGenerationRequest(BaseModel):
     prompt: str
+    max_new_tokens: int = 50  # Reduce this for faster responses
+    temperature: float = 0.7  # Lower for more deterministic outputs
     top_k: int = 50
     top_p: float = 0.9
     do_sample: bool = True