Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,6 @@ print(torch.__version__)
|
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
10 |
import gradio as gr
|
11 |
from threading import Thread
|
12 |
-
import bitsandbytes as bnb
|
13 |
|
14 |
MODEL_BIG = "HuggingFaceTB/SmolLM-360M-Instruct"
|
15 |
MODEL_SMALL = "HuggingFaceTB/SmolLM-135M-Instruct"
|
@@ -37,11 +36,11 @@ END_MESSAGE = """
|
|
37 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_SMALL)
|
38 |
model_big = AutoModelForCausalLM.from_pretrained(
|
39 |
MODEL_BIG,
|
40 |
-
|
41 |
device_map="auto")
|
42 |
model_small = AutoModelForCausalLM.from_pretrained(
|
43 |
MODEL_SMALL,
|
44 |
-
|
45 |
device_map="auto")
|
46 |
|
47 |
if model_big.device == "cuda":
|
|
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
10 |
import gradio as gr
|
11 |
from threading import Thread
|
|
|
12 |
|
13 |
MODEL_BIG = "HuggingFaceTB/SmolLM-360M-Instruct"
|
14 |
MODEL_SMALL = "HuggingFaceTB/SmolLM-135M-Instruct"
|
|
|
36 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_SMALL)
|
37 |
model_big = AutoModelForCausalLM.from_pretrained(
|
38 |
MODEL_BIG,
|
39 |
+
torch_dtype=torch.bfloat16,
|
40 |
device_map="auto")
|
41 |
model_small = AutoModelForCausalLM.from_pretrained(
|
42 |
MODEL_SMALL,
|
43 |
+
torch_dtype=torch.bfloat16,
|
44 |
device_map="auto")
|
45 |
|
46 |
if model_big.device == "cuda":
|