Update handler.py
Browse files- handler.py +46 -10
handler.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
import os
|
|
|
|
|
2 |
from typing import Dict, List, Any
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
|
4 |
import torch
|
5 |
-
from peft import PeftModel
|
6 |
from dotenv import load_dotenv
|
7 |
|
8 |
load_dotenv()
|
@@ -12,22 +14,56 @@ class EndpointHandler:
|
|
12 |
"""
|
13 |
Initializes the model and tokenizer.
|
14 |
"""
|
15 |
-
# Key settings (from environment variables, with defaults)
|
16 |
max_seq_length = int(os.getenv("MAX_SEQ_LENGTH", 2048))
|
17 |
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 512))
|
18 |
self.hf_token = os.getenv("HUGGINGFACE_TOKEN")
|
19 |
-
self.model_dir = os.getenv("MODEL_DIR", ".")
|
20 |
-
self.base_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
|
21 |
|
22 |
print(f"MODEL_DIR: {self.model_dir}")
|
23 |
-
print(f"Files in model directory: {os.listdir(self.model_dir)}")
|
24 |
|
25 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
self.config = AutoConfig.from_pretrained(
|
27 |
self.base_model_name, token=self.hf_token, trust_remote_code=True
|
28 |
)
|
29 |
|
30 |
-
# Load Tokenizer
|
31 |
try:
|
32 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
33 |
self.base_model_name, token=self.hf_token, trust_remote_code=True
|
@@ -36,15 +72,15 @@ class EndpointHandler:
|
|
36 |
print(f"Error loading tokenizer: {e}")
|
37 |
raise
|
38 |
|
39 |
-
# Load Model
|
40 |
try:
|
41 |
base_model = AutoModelForCausalLM.from_pretrained(
|
42 |
self.base_model_name,
|
43 |
config=self.config,
|
44 |
-
torch_dtype=torch.bfloat16,
|
45 |
token=self.hf_token,
|
46 |
device_map="auto",
|
47 |
-
trust_remote_code=True,
|
48 |
)
|
49 |
self.model = PeftModel.from_pretrained(base_model, self.model_dir)
|
50 |
|
|
|
1 |
import os
|
2 |
+
import time # Import the time module
|
3 |
+
import json
|
4 |
from typing import Dict, List, Any
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
|
6 |
import torch
|
7 |
+
from peft import PeftModel
|
8 |
from dotenv import load_dotenv
|
9 |
|
10 |
load_dotenv()
|
|
|
14 |
"""
|
15 |
Initializes the model and tokenizer.
|
16 |
"""
|
|
|
17 |
max_seq_length = int(os.getenv("MAX_SEQ_LENGTH", 2048))
|
18 |
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 512))
|
19 |
self.hf_token = os.getenv("HUGGINGFACE_TOKEN")
|
20 |
+
self.model_dir = os.getenv("MODEL_DIR", ".")
|
21 |
+
self.base_model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
|
22 |
|
23 |
print(f"MODEL_DIR: {self.model_dir}")
|
24 |
+
print(f"Files in model directory (initial): {os.listdir(self.model_dir)}")
|
25 |
|
26 |
+
# --- Wait for adapter_config.json ---
|
27 |
+
config_path = os.path.join(self.model_dir, "adapter_config.json")
|
28 |
+
max_wait_time = 60 # Wait up to 60 seconds
|
29 |
+
wait_interval = 2 # Check every 2 seconds
|
30 |
+
start_time = time.time()
|
31 |
+
|
32 |
+
while not os.path.exists(config_path):
|
33 |
+
print(f"Waiting for adapter_config.json to appear...")
|
34 |
+
time.sleep(wait_interval)
|
35 |
+
if time.time() - start_time > max_wait_time:
|
36 |
+
raise FileNotFoundError(
|
37 |
+
f"adapter_config.json not found after {max_wait_time} seconds."
|
38 |
+
)
|
39 |
+
print("adapter_config.json found!")
|
40 |
+
|
41 |
+
# --- Verify adapter_config.json contents ---
|
42 |
+
try:
|
43 |
+
with open(config_path, "r") as f:
|
44 |
+
adapter_config = json.load(f)
|
45 |
+
# Check for essential keys
|
46 |
+
if "base_model_name_or_path" not in adapter_config or \
|
47 |
+
"task_type" not in adapter_config:
|
48 |
+
raise ValueError("adapter_config.json is missing required keys.")
|
49 |
+
if adapter_config["base_model_name_or_path"] != self.base_model_name:
|
50 |
+
raise ValueError("adapter_config.json base_model_name_or_path mismatch.")
|
51 |
+
if adapter_config["task_type"] != "CAUSAL_LM":
|
52 |
+
raise ValueError("adapter_config.json task_type is incorrect.")
|
53 |
+
print("adapter_config.json contents verified.")
|
54 |
+
|
55 |
+
except (FileNotFoundError, json.JSONDecodeError, ValueError) as e:
|
56 |
+
raise Exception(f"Error verifying adapter_config.json: {e}")
|
57 |
+
|
58 |
+
print(f"Files in model directory (after wait): {os.listdir(self.model_dir)}")
|
59 |
+
|
60 |
+
|
61 |
+
# Load Config
|
62 |
self.config = AutoConfig.from_pretrained(
|
63 |
self.base_model_name, token=self.hf_token, trust_remote_code=True
|
64 |
)
|
65 |
|
66 |
+
# Load Tokenizer
|
67 |
try:
|
68 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
69 |
self.base_model_name, token=self.hf_token, trust_remote_code=True
|
|
|
72 |
print(f"Error loading tokenizer: {e}")
|
73 |
raise
|
74 |
|
75 |
+
# Load Model
|
76 |
try:
|
77 |
base_model = AutoModelForCausalLM.from_pretrained(
|
78 |
self.base_model_name,
|
79 |
config=self.config,
|
80 |
+
torch_dtype=torch.bfloat16,
|
81 |
token=self.hf_token,
|
82 |
device_map="auto",
|
83 |
+
trust_remote_code=True,
|
84 |
)
|
85 |
self.model = PeftModel.from_pretrained(base_model, self.model_dir)
|
86 |
|