api-xylaria

Sleeping

App Files Files Community

khurrameycon commited on Jan 1

Commit

f7442cb

verified ·

1 Parent(s): ec60ae3

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -20

app.py CHANGED Viewed

@@ -96,10 +96,9 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 from huggingface_hub import snapshot_download
-from safetensors.torch import load_file
 class ModelInput(BaseModel):
     prompt: str
@@ -120,26 +119,22 @@ try:
         trust_remote_code=True,
         device_map="auto"
     )
     # Load tokenizer from base model
     print("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(base_model_path)
     # Download adapter weights
     print("Downloading adapter weights...")
     adapter_path_local = snapshot_download(adapter_path)
-    # Load the safetensors file
-    print("Loading adapter weights...")
-    state_dict = load_file(f"{adapter_path_local}/adapter_model.safetensors")
-    # Load state dict into model
-    model.load_state_dict(state_dict, strict=False)
-    # Optional: Set the model to use the adapter
-    # In case you are using adapters, you need to activate them
-    model.set_active_adapters(adapter_path)  # Activating the adapter
     print("Model and adapter loaded successfully!")
 except Exception as e:
@@ -153,7 +148,7 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
         input_text = tokenizer.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
         outputs = model.generate(
             inputs,
@@ -162,10 +157,10 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
             top_p=0.9,
             do_sample=True,
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response
     except Exception as e:
         raise ValueError(f"Error generating response: {e}")
@@ -179,7 +174,7 @@ async def generate_text(input: ModelInput):
             max_new_tokens=input.max_new_tokens
         )
         return {"generated_text": response}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoAdapterModel
 import torch
 from huggingface_hub import snapshot_download
 class ModelInput(BaseModel):
     prompt: str
         trust_remote_code=True,
         device_map="auto"
     )
     # Load tokenizer from base model
     print("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(base_model_path)
     # Download adapter weights
     print("Downloading adapter weights...")
     adapter_path_local = snapshot_download(adapter_path)
+    # Load the adapter model
+    print("Loading adapter model...")
+    adapter_model = AutoAdapterModel.from_pretrained(adapter_path_local, from_pt=True)
+    # Combine the base model and adapter
+    model = model.with_adapter(adapter_model)
     print("Model and adapter loaded successfully!")
 except Exception as e:
         input_text = tokenizer.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
         inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
         outputs = model.generate(
             inputs,
             top_p=0.9,
             do_sample=True,
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response
     except Exception as e:
         raise ValueError(f"Error generating response: {e}")
             max_new_tokens=input.max_new_tokens
         )
         return {"generated_text": response}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))