Buildwellai commited on
Commit
af0695e
·
verified ·
1 Parent(s): e49910f

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +46 -10
handler.py CHANGED
@@ -1,8 +1,10 @@
1
  import os
 
 
2
  from typing import Dict, List, Any
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
4
  import torch
5
- from peft import PeftModel # Import PeftModel
6
  from dotenv import load_dotenv
7
 
8
  load_dotenv()
@@ -12,22 +14,56 @@ class EndpointHandler:
12
  """
13
  Initializes the model and tokenizer.
14
  """
15
- # Key settings (from environment variables, with defaults)
16
  max_seq_length = int(os.getenv("MAX_SEQ_LENGTH", 2048))
17
  max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 512))
18
  self.hf_token = os.getenv("HUGGINGFACE_TOKEN")
19
- self.model_dir = os.getenv("MODEL_DIR", ".") # Should be "." for root
20
- self.base_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" # Base model!
21
 
22
  print(f"MODEL_DIR: {self.model_dir}")
23
- print(f"Files in model directory: {os.listdir(self.model_dir)}")
24
 
25
- # Load Config (with trust_remote_code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  self.config = AutoConfig.from_pretrained(
27
  self.base_model_name, token=self.hf_token, trust_remote_code=True
28
  )
29
 
30
- # Load Tokenizer (with trust_remote_code)
31
  try:
32
  self.tokenizer = AutoTokenizer.from_pretrained(
33
  self.base_model_name, token=self.hf_token, trust_remote_code=True
@@ -36,15 +72,15 @@ class EndpointHandler:
36
  print(f"Error loading tokenizer: {e}")
37
  raise
38
 
39
- # Load Model and LoRA Adapter (with trust_remote_code)
40
  try:
41
  base_model = AutoModelForCausalLM.from_pretrained(
42
  self.base_model_name,
43
  config=self.config,
44
- torch_dtype=torch.bfloat16, # Use bfloat16
45
  token=self.hf_token,
46
  device_map="auto",
47
- trust_remote_code=True, # Important for Qwen2
48
  )
49
  self.model = PeftModel.from_pretrained(base_model, self.model_dir)
50
 
 
1
  import os
2
+ import time # Import the time module
3
+ import json
4
  from typing import Dict, List, Any
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
6
  import torch
7
+ from peft import PeftModel
8
  from dotenv import load_dotenv
9
 
10
  load_dotenv()
 
14
  """
15
  Initializes the model and tokenizer.
16
  """
 
17
  max_seq_length = int(os.getenv("MAX_SEQ_LENGTH", 2048))
18
  max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 512))
19
  self.hf_token = os.getenv("HUGGINGFACE_TOKEN")
20
+ self.model_dir = os.getenv("MODEL_DIR", ".")
21
+ self.base_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
22
 
23
  print(f"MODEL_DIR: {self.model_dir}")
24
+ print(f"Files in model directory (initial): {os.listdir(self.model_dir)}")
25
 
26
+ # --- Wait for adapter_config.json ---
27
+ config_path = os.path.join(self.model_dir, "adapter_config.json")
28
+ max_wait_time = 60 # Wait up to 60 seconds
29
+ wait_interval = 2 # Check every 2 seconds
30
+ start_time = time.time()
31
+
32
+ while not os.path.exists(config_path):
33
+ print(f"Waiting for adapter_config.json to appear...")
34
+ time.sleep(wait_interval)
35
+ if time.time() - start_time > max_wait_time:
36
+ raise FileNotFoundError(
37
+ f"adapter_config.json not found after {max_wait_time} seconds."
38
+ )
39
+ print("adapter_config.json found!")
40
+
41
+ # --- Verify adapter_config.json contents ---
42
+ try:
43
+ with open(config_path, "r") as f:
44
+ adapter_config = json.load(f)
45
+ # Check for essential keys
46
+ if "base_model_name_or_path" not in adapter_config or \
47
+ "task_type" not in adapter_config:
48
+ raise ValueError("adapter_config.json is missing required keys.")
49
+ if adapter_config["base_model_name_or_path"] != self.base_model_name:
50
+ raise ValueError("adapter_config.json base_model_name_or_path mismatch.")
51
+ if adapter_config["task_type"] != "CAUSAL_LM":
52
+ raise ValueError("adapter_config.json task_type is incorrect.")
53
+ print("adapter_config.json contents verified.")
54
+
55
+ except (FileNotFoundError, json.JSONDecodeError, ValueError) as e:
56
+ raise Exception(f"Error verifying adapter_config.json: {e}")
57
+
58
+ print(f"Files in model directory (after wait): {os.listdir(self.model_dir)}")
59
+
60
+
61
+ # Load Config
62
  self.config = AutoConfig.from_pretrained(
63
  self.base_model_name, token=self.hf_token, trust_remote_code=True
64
  )
65
 
66
+ # Load Tokenizer
67
  try:
68
  self.tokenizer = AutoTokenizer.from_pretrained(
69
  self.base_model_name, token=self.hf_token, trust_remote_code=True
 
72
  print(f"Error loading tokenizer: {e}")
73
  raise
74
 
75
+ # Load Model
76
  try:
77
  base_model = AutoModelForCausalLM.from_pretrained(
78
  self.base_model_name,
79
  config=self.config,
80
+ torch_dtype=torch.bfloat16,
81
  token=self.hf_token,
82
  device_map="auto",
83
+ trust_remote_code=True,
84
  )
85
  self.model = PeftModel.from_pretrained(base_model, self.model_dir)
86