Buildwellai commited on
Commit
7cd4d08
·
verified ·
1 Parent(s): 1eaeebf

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +23 -30
handler.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  from typing import Dict, List, Any
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
4
  import torch
5
- from peft import PeftModel # Import PeftModel
6
  from dotenv import load_dotenv
7
 
8
  load_dotenv()
@@ -12,64 +12,60 @@ class EndpointHandler:
12
  """
13
  Initializes the model and tokenizer.
14
  """
15
- # Key settings (from environment variables, with defaults)
16
  max_seq_length = int(os.getenv("MAX_SEQ_LENGTH", 2048))
17
  max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 512))
18
  self.hf_token = os.getenv("HUGGINGFACE_TOKEN")
19
- self.model_dir = os.getenv("MODEL_DIR", ".")
20
- self.base_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" # The base model ID
21
 
22
  print(f"MODEL_DIR: {self.model_dir}")
23
  print(f"Files in model directory: {os.listdir(self.model_dir)}")
24
 
25
- # --- 1. Load Config ---
26
- self.config = AutoConfig.from_pretrained(self.base_model_name, token=self.hf_token, trust_remote_code=True)
 
 
27
 
28
- # --- 2. Load Tokenizer ---
29
  try:
30
- self.tokenizer = AutoTokenizer.from_pretrained(self.base_model_name, token=self.hf_token, trust_remote_code=True)
 
 
31
  except Exception as e:
32
  print(f"Error loading tokenizer: {e}")
33
  raise
34
 
35
- # --- 3. Load Model with PeftModel ---
36
  try:
37
- # Load base model
38
- base_model = AutoModelForCausalLM.from_pretrained(
39
  self.base_model_name,
40
  config=self.config,
41
- torch_dtype=torch.bfloat16,
42
  token=self.hf_token,
43
  device_map="auto",
44
- trust_remote_code=True,
45
  )
46
-
47
- # Load the LoRA model using PeftModel
48
  self.model = PeftModel.from_pretrained(base_model, self.model_dir)
49
- # No need for FastLanguageModel.for_inference() here, PeftModel handles it
50
 
51
  except Exception as e:
52
  print(f"Error loading model: {e}")
53
  raise
54
 
55
- # Define the prompt style
56
- self.prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
57
- Write a response that appropriately completes the request.
58
  Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
 
59
  ### Instruction:
60
- You are BuildwellAI, an AI assistant specialized in UK building regulations and construction standards. You provide accurate, helpful information about building codes, construction best practices, and regulatory compliance in the UK.
61
- Always be professional and precise in your responses.
 
62
  ### Question:
63
  {}
 
64
  ### Response:
65
  <think>{}"""
66
 
67
-
68
-
69
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
70
- """
71
- Processes the input and generates a response.
72
- """
73
  inputs = data.pop("inputs", None)
74
  if inputs is None:
75
  return [{"error": "No input provided. 'inputs' key missing."}]
@@ -77,14 +73,11 @@ Always be professional and precise in your responses.
77
  return [{"error": "Invalid input type. 'inputs' must be a string."}]
78
 
79
  input_text = self.prompt_style.format(inputs, "")
80
-
81
- # Tokenize and move to CUDA (if available)
82
  input_tokens = self.tokenizer([input_text], return_tensors="pt")
83
  if torch.cuda.is_available():
84
  input_tokens = input_tokens.to("cuda")
85
 
86
-
87
- with torch.no_grad(): # Ensure no gradient calculation
88
  output_tokens = self.model.generate(
89
  input_ids=input_tokens.input_ids,
90
  attention_mask=input_tokens.attention_mask,
 
2
  from typing import Dict, List, Any
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
4
  import torch
5
+ from peft import PeftModel
6
  from dotenv import load_dotenv
7
 
8
  load_dotenv()
 
12
  """
13
  Initializes the model and tokenizer.
14
  """
 
15
  max_seq_length = int(os.getenv("MAX_SEQ_LENGTH", 2048))
16
  max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 512))
17
  self.hf_token = os.getenv("HUGGINGFACE_TOKEN")
18
+ self.model_dir = os.getenv("MODEL_DIR", ".") # Should be "." for root
19
+ self.base_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" # Base model!
20
 
21
  print(f"MODEL_DIR: {self.model_dir}")
22
  print(f"Files in model directory: {os.listdir(self.model_dir)}")
23
 
24
+ # Load Config (with trust_remote_code)
25
+ self.config = AutoConfig.from_pretrained(
26
+ self.base_model_name, token=self.hf_token, trust_remote_code=True
27
+ )
28
 
29
+ # Load Tokenizer (with trust_remote_code)
30
  try:
31
+ self.tokenizer = AutoTokenizer.from_pretrained(
32
+ self.base_model_name, token=self.hf_token, trust_remote_code=True
33
+ )
34
  except Exception as e:
35
  print(f"Error loading tokenizer: {e}")
36
  raise
37
 
38
+ # Load Model and LoRA Adapter (with trust_remote_code)
39
  try:
40
+ base_model = AutoModelForCausalLM.from_pretrained(
 
41
  self.base_model_name,
42
  config=self.config,
43
+ torch_dtype=torch.bfloat16, # Use bfloat16
44
  token=self.hf_token,
45
  device_map="auto",
46
+ trust_remote_code=True, # Important for Qwen2
47
  )
 
 
48
  self.model = PeftModel.from_pretrained(base_model, self.model_dir)
 
49
 
50
  except Exception as e:
51
  print(f"Error loading model: {e}")
52
  raise
53
 
54
+ self.prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context.
55
+ Write a response that appropriately completes the request.
 
56
  Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
57
+
58
  ### Instruction:
59
+ You are BuildwellAI, an AI assistant specialized in UK building regulations and construction standards. You provide accurate, helpful information about building codes, construction best practices, and regulatory compliance in the UK.
60
+ Always be professional and precise in your responses..
61
+
62
  ### Question:
63
  {}
64
+
65
  ### Response:
66
  <think>{}"""
67
 
 
 
68
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
 
 
 
69
  inputs = data.pop("inputs", None)
70
  if inputs is None:
71
  return [{"error": "No input provided. 'inputs' key missing."}]
 
73
  return [{"error": "Invalid input type. 'inputs' must be a string."}]
74
 
75
  input_text = self.prompt_style.format(inputs, "")
 
 
76
  input_tokens = self.tokenizer([input_text], return_tensors="pt")
77
  if torch.cuda.is_available():
78
  input_tokens = input_tokens.to("cuda")
79
 
80
+ with torch.no_grad():
 
81
  output_tokens = self.model.generate(
82
  input_ids=input_tokens.input_ids,
83
  attention_mask=input_tokens.attention_mask,