api-xylaria

Sleeping

App Files Files Community

khurrameycon commited on Jan 1

Commit

57efdb3

verified ·

1 Parent(s): 9196e30

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -12

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 class ModelInput(BaseModel):
     prompt: str
@@ -8,34 +10,46 @@ class ModelInput(BaseModel):
 app = FastAPI()
-# Since we're getting config errors with PEFT, let's load the fine-tuned model directly
-model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
 try:
-    # Load the model and tokenizer directly from your fine-tuned version
     model = AutoModelForCausalLM.from_pretrained(
-        model_path,
         trust_remote_code=True,
         device_map="auto"
     )
-    tokenizer = AutoTokenizer.from_pretrained(model_path)
-    print("Model loaded successfully!")
 except Exception as e:
-    print(f"Error loading model: {e}")
     raise
 def generate_response(model, tokenizer, instruction, max_new_tokens=128):
     """Generate a response from the model based on an instruction."""
     try:
-        # Format the input
         messages = [{"role": "user", "content": instruction}]
         input_text = tokenizer.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
-        # Generate
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
         outputs = model.generate(
             inputs,
@@ -45,7 +59,6 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
             do_sample=True,
         )
-        # Decode
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response
@@ -54,7 +67,6 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
 @app.post("/generate")
 async def generate_text(input: ModelInput):
-    """API endpoint to generate text."""
     try:
         response = generate_response(
             model=model,

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+from huggingface_hub import snapshot_download
 class ModelInput(BaseModel):
     prompt: str
 app = FastAPI()
+# Define model paths
+base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
+adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
 try:
+    # First load the base model
+    print("Loading base model...")
     model = AutoModelForCausalLM.from_pretrained(
+        base_model_path,
+        torch_dtype=torch.float16,
         trust_remote_code=True,
         device_map="auto"
     )
+    # Load tokenizer from base model
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(base_model_path)
+    # Download and load adapter weights
+    print("Loading adapter weights...")
+    adapter_path_local = snapshot_download(adapter_path)
+    # Load the adapter weights
+    state_dict = torch.load(f"{adapter_path_local}/adapter_model.safetensors")
+    model.load_state_dict(state_dict, strict=False)
+    print("Model and adapter loaded successfully!")
 except Exception as e:
+    print(f"Error during model loading: {e}")
     raise
 def generate_response(model, tokenizer, instruction, max_new_tokens=128):
     """Generate a response from the model based on an instruction."""
     try:
         messages = [{"role": "user", "content": instruction}]
         input_text = tokenizer.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
         outputs = model.generate(
             inputs,
             do_sample=True,
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response
 @app.post("/generate")
 async def generate_text(input: ModelInput):
     try:
         response = generate_response(
             model=model,