khurrameycon commited on
Commit
57efdb3
·
verified ·
1 Parent(s): 9196e30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -12
app.py CHANGED
@@ -1,6 +1,8 @@
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 
 
4
 
5
  class ModelInput(BaseModel):
6
  prompt: str
@@ -8,34 +10,46 @@ class ModelInput(BaseModel):
8
 
9
  app = FastAPI()
10
 
11
- # Since we're getting config errors with PEFT, let's load the fine-tuned model directly
12
- model_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
 
13
 
14
  try:
15
- # Load the model and tokenizer directly from your fine-tuned version
 
16
  model = AutoModelForCausalLM.from_pretrained(
17
- model_path,
 
18
  trust_remote_code=True,
19
  device_map="auto"
20
  )
21
 
22
- tokenizer = AutoTokenizer.from_pretrained(model_path)
23
- print("Model loaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  except Exception as e:
26
- print(f"Error loading model: {e}")
27
  raise
28
 
29
  def generate_response(model, tokenizer, instruction, max_new_tokens=128):
30
  """Generate a response from the model based on an instruction."""
31
  try:
32
- # Format the input
33
  messages = [{"role": "user", "content": instruction}]
34
  input_text = tokenizer.apply_chat_template(
35
  messages, tokenize=False, add_generation_prompt=True
36
  )
37
 
38
- # Generate
39
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
40
  outputs = model.generate(
41
  inputs,
@@ -45,7 +59,6 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
45
  do_sample=True,
46
  )
47
 
48
- # Decode
49
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
  return response
51
 
@@ -54,7 +67,6 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=128):
54
 
55
  @app.post("/generate")
56
  async def generate_text(input: ModelInput):
57
- """API endpoint to generate text."""
58
  try:
59
  response = generate_response(
60
  model=model,
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
+ from huggingface_hub import snapshot_download
6
 
7
  class ModelInput(BaseModel):
8
  prompt: str
 
10
 
11
  app = FastAPI()
12
 
13
+ # Define model paths
14
+ base_model_path = "HuggingFaceTB/SmolLM2-135M-Instruct"
15
+ adapter_path = "khurrameycon/SmolLM-135M-Instruct-qa_pairs_converted.json-25epochs"
16
 
17
  try:
18
+ # First load the base model
19
+ print("Loading base model...")
20
  model = AutoModelForCausalLM.from_pretrained(
21
+ base_model_path,
22
+ torch_dtype=torch.float16,
23
  trust_remote_code=True,
24
  device_map="auto"
25
  )
26
 
27
+ # Load tokenizer from base model
28
+ print("Loading tokenizer...")
29
+ tokenizer = AutoTokenizer.from_pretrained(base_model_path)
30
+
31
+ # Download and load adapter weights
32
+ print("Loading adapter weights...")
33
+ adapter_path_local = snapshot_download(adapter_path)
34
+
35
+ # Load the adapter weights
36
+ state_dict = torch.load(f"{adapter_path_local}/adapter_model.safetensors")
37
+ model.load_state_dict(state_dict, strict=False)
38
+
39
+ print("Model and adapter loaded successfully!")
40
 
41
  except Exception as e:
42
+ print(f"Error during model loading: {e}")
43
  raise
44
 
45
  def generate_response(model, tokenizer, instruction, max_new_tokens=128):
46
  """Generate a response from the model based on an instruction."""
47
  try:
 
48
  messages = [{"role": "user", "content": instruction}]
49
  input_text = tokenizer.apply_chat_template(
50
  messages, tokenize=False, add_generation_prompt=True
51
  )
52
 
 
53
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
54
  outputs = model.generate(
55
  inputs,
 
59
  do_sample=True,
60
  )
61
 
 
62
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
63
  return response
64
 
 
67
 
68
  @app.post("/generate")
69
  async def generate_text(input: ModelInput):
 
70
  try:
71
  response = generate_response(
72
  model=model,