hayas commited on
Commit
ee17e04
·
1 Parent(s): 51b956b
.pre-commit-config.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
+ hooks:
5
+ - id: check-executables-have-shebangs
6
+ - id: check-json
7
+ - id: check-merge-conflict
8
+ - id: check-shebang-scripts-are-executable
9
+ - id: check-toml
10
+ - id: check-yaml
11
+ - id: end-of-file-fixer
12
+ - id: mixed-line-ending
13
+ args: ["--fix=lf"]
14
+ - id: requirements-txt-fixer
15
+ - id: trailing-whitespace
16
+ - repo: https://github.com/astral-sh/ruff-pre-commit
17
+ rev: v0.9.6
18
+ hooks:
19
+ - id: ruff
20
+ args: ["--fix"]
21
+ - id: ruff-format
22
+ - repo: https://github.com/pre-commit/mirrors-mypy
23
+ rev: v1.15.0
24
+ hooks:
25
+ - id: mypy
26
+ args: ["--ignore-missing-imports"]
27
+ additional_dependencies:
28
+ [
29
+ "types-python-slugify",
30
+ "types-pytz",
31
+ "types-PyYAML",
32
+ "types-requests",
33
+ ]
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10
.vscode/extensions.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "recommendations": [
3
+ "ms-python.python",
4
+ "charliermarsh.ruff",
5
+ "streetsidesoftware.code-spell-checker",
6
+ "tamasfe.even-better-toml"
7
+ ]
8
+ }
.vscode/settings.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "editor.formatOnSave": true,
3
+ "files.insertFinalNewline": false,
4
+ "[python]": {
5
+ "editor.defaultFormatter": "charliermarsh.ruff",
6
+ "editor.formatOnType": true,
7
+ "editor.codeActionsOnSave": {
8
+ "source.fixAll.ruff": "explicit",
9
+ "source.organizeImports": "explicit"
10
+ }
11
+ },
12
+ "[jupyter]": {
13
+ "files.insertFinalNewline": false
14
+ },
15
+ "notebook.output.scrolling": true,
16
+ "notebook.formatOnSave.enabled": true
17
+ }
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Deepseek R1 Distill Qwen2.5 Bakeneko 32b Awq
3
- emoji: 🦀
4
- colorFrom: yellow
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.16.0
8
  app_file: app.py
 
1
  ---
2
  title: Deepseek R1 Distill Qwen2.5 Bakeneko 32b Awq
3
+ emoji:
4
+ colorFrom: red
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 5.16.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import os
4
+ from collections.abc import Iterator
5
+ from threading import Thread
6
+
7
+ import gradio as gr
8
+ import spaces
9
+ import torch
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
11
+
12
+ DESCRIPTION = "# rinna/deepseek-r1-distill-qwen2.5-bakeneko-32b-awq"
13
+
14
+ MAX_MAX_NEW_TOKENS = 4096
15
+ DEFAULT_MAX_NEW_TOKENS = 4096
16
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
17
+
18
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
19
+
20
+ model_id = "rinna/deepseek-r1-distill-qwen2.5-bakeneko-32b-awq"
21
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ model_id,
24
+ device_map="auto",
25
+ torch_dtype=torch.bfloat16,
26
+ )
27
+ model.eval()
28
+
29
+
30
+ @spaces.GPU(duration=120)
31
+ def generate(
32
+ message: str,
33
+ chat_history: list[dict],
34
+ max_new_tokens: int = 4096,
35
+ temperature: float = 0.6,
36
+ top_p: float = 0.95,
37
+ ) -> Iterator[str]:
38
+ messages = [*chat_history, {"role": "user", "content": message}]
39
+
40
+ input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
41
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
42
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
43
+ gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
44
+ input_ids = input_ids.to(model.device)
45
+
46
+ streamer = TextIteratorStreamer(tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
47
+ generate_kwargs = dict(
48
+ {"input_ids": input_ids},
49
+ streamer=streamer,
50
+ max_new_tokens=max_new_tokens,
51
+ do_sample=True,
52
+ top_p=top_p,
53
+ temperature=temperature,
54
+ )
55
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
56
+ t.start()
57
+
58
+ outputs = []
59
+ for text in streamer:
60
+ outputs.append(text)
61
+ yield "".join(outputs)
62
+
63
+
64
+ demo = gr.ChatInterface(
65
+ fn=generate,
66
+ additional_inputs=[
67
+ gr.Slider(
68
+ label="Max new tokens",
69
+ minimum=1,
70
+ maximum=MAX_MAX_NEW_TOKENS,
71
+ step=1,
72
+ value=DEFAULT_MAX_NEW_TOKENS,
73
+ ),
74
+ gr.Slider(
75
+ label="Temperature",
76
+ minimum=0.1,
77
+ maximum=4.0,
78
+ step=0.1,
79
+ value=0.6,
80
+ ),
81
+ gr.Slider(
82
+ label="Top-p (nucleus sampling)",
83
+ minimum=0.05,
84
+ maximum=1.0,
85
+ step=0.05,
86
+ value=0.95,
87
+ ),
88
+ ],
89
+ stop_btn=None,
90
+ examples=[
91
+ ["微分に関する簡単な文章問題を作成し、その問題を解いてください。"],
92
+ ],
93
+ cache_examples=False,
94
+ type="messages",
95
+ description=DESCRIPTION,
96
+ css_paths="style.css",
97
+ fill_height=True,
98
+ chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["think"]),
99
+ )
100
+
101
+ if __name__ == "__main__":
102
+ demo.launch()
pyproject.toml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "deepseek-r1-distill-qwen2-5-bakeneko-32b-awq"
3
+ version = "0.1.0"
4
+ description = ""
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = [
8
+ "accelerate>=1.3.0",
9
+ "autoawq>=0.2.7.post3",
10
+ "gradio>=5.16.0",
11
+ "hf-transfer>=0.1.9",
12
+ "spaces>=0.32.0",
13
+ "torch==2.4.0",
14
+ "transformers>=4.48.3",
15
+ ]
16
+
17
+ [tool.ruff]
18
+ line-length = 119
19
+
20
+ [tool.ruff.lint]
21
+ select = ["ALL"]
22
+ ignore = [
23
+ "COM812", # missing-trailing-comma
24
+ "D203", # one-blank-line-before-class
25
+ "D213", # multi-line-summary-second-line
26
+ "E501", # line-too-long
27
+ "SIM117", # multiple-with-statements
28
+ ]
29
+ extend-ignore = [
30
+ "D100", # undocumented-public-module
31
+ "D101", # undocumented-public-class
32
+ "D102", # undocumented-public-method
33
+ "D103", # undocumented-public-function
34
+ "D104", # undocumented-public-package
35
+ "D105", # undocumented-magic-method
36
+ "D107", # undocumented-public-init
37
+ "EM101", # raw-string-in-exception
38
+ "FBT001", # boolean-type-hint-positional-argument
39
+ "FBT002", # boolean-default-value-positional-argument
40
+ "PD901", # pandas-df-variable-name
41
+ "PGH003", # blanket-type-ignore
42
+ "PLR0913", # too-many-arguments
43
+ "PLR0915", # too-many-statements
44
+ "TRY003", # raise-vanilla-args
45
+ ]
46
+ unfixable = [
47
+ "F401", # unused-import
48
+ ]
49
+
50
+ [tool.ruff.lint.pydocstyle]
51
+ convention = "google"
52
+
53
+ [tool.ruff.lint.per-file-ignores]
54
+ "*.ipynb" = ["T201"]
55
+
56
+ [tool.ruff.format]
57
+ docstring-code-format = true
requirements.txt ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml -o requirements.txt
3
+ accelerate==1.3.0
4
+ # via
5
+ # deepseek-r1-distill-qwen2-5-bakeneko-32b-awq (pyproject.toml)
6
+ # autoawq
7
+ aiofiles==23.2.1
8
+ # via gradio
9
+ aiohappyeyeballs==2.4.6
10
+ # via aiohttp
11
+ aiohttp==3.11.12
12
+ # via
13
+ # datasets
14
+ # fsspec
15
+ aiosignal==1.3.2
16
+ # via aiohttp
17
+ annotated-types==0.7.0
18
+ # via pydantic
19
+ anyio==4.8.0
20
+ # via
21
+ # gradio
22
+ # httpx
23
+ # starlette
24
+ async-timeout==5.0.1
25
+ # via aiohttp
26
+ attrs==25.1.0
27
+ # via aiohttp
28
+ autoawq==0.2.7.post3
29
+ # via deepseek-r1-distill-qwen2-5-bakeneko-32b-awq (pyproject.toml)
30
+ certifi==2025.1.31
31
+ # via
32
+ # httpcore
33
+ # httpx
34
+ # requests
35
+ charset-normalizer==3.4.1
36
+ # via requests
37
+ click==8.1.8
38
+ # via
39
+ # typer
40
+ # uvicorn
41
+ datasets==3.3.0
42
+ # via autoawq
43
+ dill==0.3.8
44
+ # via
45
+ # datasets
46
+ # multiprocess
47
+ exceptiongroup==1.2.2
48
+ # via anyio
49
+ fastapi==0.115.8
50
+ # via gradio
51
+ ffmpy==0.5.0
52
+ # via gradio
53
+ filelock==3.17.0
54
+ # via
55
+ # datasets
56
+ # huggingface-hub
57
+ # torch
58
+ # transformers
59
+ # triton
60
+ frozenlist==1.5.0
61
+ # via
62
+ # aiohttp
63
+ # aiosignal
64
+ fsspec==2024.12.0
65
+ # via
66
+ # datasets
67
+ # gradio-client
68
+ # huggingface-hub
69
+ # torch
70
+ gradio==5.16.0
71
+ # via
72
+ # deepseek-r1-distill-qwen2-5-bakeneko-32b-awq (pyproject.toml)
73
+ # spaces
74
+ gradio-client==1.7.0
75
+ # via gradio
76
+ h11==0.14.0
77
+ # via
78
+ # httpcore
79
+ # uvicorn
80
+ hf-transfer==0.1.9
81
+ # via deepseek-r1-distill-qwen2-5-bakeneko-32b-awq (pyproject.toml)
82
+ httpcore==1.0.7
83
+ # via httpx
84
+ httpx==0.28.1
85
+ # via
86
+ # gradio
87
+ # gradio-client
88
+ # safehttpx
89
+ # spaces
90
+ huggingface-hub==0.28.1
91
+ # via
92
+ # accelerate
93
+ # autoawq
94
+ # datasets
95
+ # gradio
96
+ # gradio-client
97
+ # tokenizers
98
+ # transformers
99
+ idna==3.10
100
+ # via
101
+ # anyio
102
+ # httpx
103
+ # requests
104
+ # yarl
105
+ jinja2==3.1.5
106
+ # via
107
+ # gradio
108
+ # torch
109
+ markdown-it-py==3.0.0
110
+ # via rich
111
+ markupsafe==2.1.5
112
+ # via
113
+ # gradio
114
+ # jinja2
115
+ mdurl==0.1.2
116
+ # via markdown-it-py
117
+ mpmath==1.3.0
118
+ # via sympy
119
+ multidict==6.1.0
120
+ # via
121
+ # aiohttp
122
+ # yarl
123
+ multiprocess==0.70.16
124
+ # via datasets
125
+ networkx==3.4.2
126
+ # via torch
127
+ numpy==2.2.3
128
+ # via
129
+ # accelerate
130
+ # datasets
131
+ # gradio
132
+ # pandas
133
+ # transformers
134
+ nvidia-cublas-cu12==12.1.3.1
135
+ # via
136
+ # nvidia-cudnn-cu12
137
+ # nvidia-cusolver-cu12
138
+ # torch
139
+ nvidia-cuda-cupti-cu12==12.1.105
140
+ # via torch
141
+ nvidia-cuda-nvrtc-cu12==12.1.105
142
+ # via torch
143
+ nvidia-cuda-runtime-cu12==12.1.105
144
+ # via torch
145
+ nvidia-cudnn-cu12==9.1.0.70
146
+ # via torch
147
+ nvidia-cufft-cu12==11.0.2.54
148
+ # via torch
149
+ nvidia-curand-cu12==10.3.2.106
150
+ # via torch
151
+ nvidia-cusolver-cu12==11.4.5.107
152
+ # via torch
153
+ nvidia-cusparse-cu12==12.1.0.106
154
+ # via
155
+ # nvidia-cusolver-cu12
156
+ # torch
157
+ nvidia-nccl-cu12==2.20.5
158
+ # via torch
159
+ nvidia-nvjitlink-cu12==12.8.61
160
+ # via
161
+ # nvidia-cusolver-cu12
162
+ # nvidia-cusparse-cu12
163
+ nvidia-nvtx-cu12==12.1.105
164
+ # via torch
165
+ orjson==3.10.15
166
+ # via gradio
167
+ packaging==24.2
168
+ # via
169
+ # accelerate
170
+ # datasets
171
+ # gradio
172
+ # gradio-client
173
+ # huggingface-hub
174
+ # spaces
175
+ # transformers
176
+ pandas==2.2.3
177
+ # via
178
+ # datasets
179
+ # gradio
180
+ pillow==11.1.0
181
+ # via gradio
182
+ propcache==0.2.1
183
+ # via
184
+ # aiohttp
185
+ # yarl
186
+ psutil==5.9.8
187
+ # via
188
+ # accelerate
189
+ # spaces
190
+ pyarrow==19.0.0
191
+ # via datasets
192
+ pydantic==2.10.6
193
+ # via
194
+ # fastapi
195
+ # gradio
196
+ # spaces
197
+ pydantic-core==2.27.2
198
+ # via pydantic
199
+ pydub==0.25.1
200
+ # via gradio
201
+ pygments==2.19.1
202
+ # via rich
203
+ python-dateutil==2.9.0.post0
204
+ # via pandas
205
+ python-multipart==0.0.20
206
+ # via gradio
207
+ pytz==2025.1
208
+ # via pandas
209
+ pyyaml==6.0.2
210
+ # via
211
+ # accelerate
212
+ # datasets
213
+ # gradio
214
+ # huggingface-hub
215
+ # transformers
216
+ regex==2024.11.6
217
+ # via transformers
218
+ requests==2.32.3
219
+ # via
220
+ # datasets
221
+ # huggingface-hub
222
+ # spaces
223
+ # transformers
224
+ rich==13.9.4
225
+ # via typer
226
+ ruff==0.9.6
227
+ # via gradio
228
+ safehttpx==0.1.6
229
+ # via gradio
230
+ safetensors==0.5.2
231
+ # via
232
+ # accelerate
233
+ # transformers
234
+ semantic-version==2.10.0
235
+ # via gradio
236
+ shellingham==1.5.4
237
+ # via typer
238
+ six==1.17.0
239
+ # via python-dateutil
240
+ sniffio==1.3.1
241
+ # via anyio
242
+ spaces==0.32.0
243
+ # via deepseek-r1-distill-qwen2-5-bakeneko-32b-awq (pyproject.toml)
244
+ starlette==0.45.3
245
+ # via
246
+ # fastapi
247
+ # gradio
248
+ sympy==1.13.3
249
+ # via torch
250
+ tokenizers==0.21.0
251
+ # via
252
+ # autoawq
253
+ # transformers
254
+ tomlkit==0.13.2
255
+ # via gradio
256
+ torch==2.4.0
257
+ # via
258
+ # deepseek-r1-distill-qwen2-5-bakeneko-32b-awq (pyproject.toml)
259
+ # accelerate
260
+ # autoawq
261
+ tqdm==4.67.1
262
+ # via
263
+ # datasets
264
+ # huggingface-hub
265
+ # transformers
266
+ transformers==4.48.3
267
+ # via
268
+ # deepseek-r1-distill-qwen2-5-bakeneko-32b-awq (pyproject.toml)
269
+ # autoawq
270
+ triton==3.0.0
271
+ # via
272
+ # autoawq
273
+ # torch
274
+ typer==0.15.1
275
+ # via gradio
276
+ typing-extensions==4.12.2
277
+ # via
278
+ # anyio
279
+ # autoawq
280
+ # fastapi
281
+ # gradio
282
+ # gradio-client
283
+ # huggingface-hub
284
+ # multidict
285
+ # pydantic
286
+ # pydantic-core
287
+ # rich
288
+ # spaces
289
+ # torch
290
+ # typer
291
+ # uvicorn
292
+ tzdata==2025.1
293
+ # via pandas
294
+ urllib3==2.3.0
295
+ # via requests
296
+ uvicorn==0.34.0
297
+ # via gradio
298
+ websockets==14.2
299
+ # via gradio-client
300
+ xxhash==3.5.0
301
+ # via datasets
302
+ yarl==1.18.3
303
+ # via aiohttp
304
+ zstandard==0.23.0
305
+ # via autoawq
style.css ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ display: block;
4
+ }
uv.lock ADDED
The diff for this file is too large to render. See raw diff