metacritical commited on
Commit
2ca9a47
·
verified ·
1 Parent(s): 1cb8f7f

Updated.

Files changed (1) hide show
  1. index.html +258 -20
index.html CHANGED
@@ -2,24 +2,18 @@
2
  <html>
3
  <head>
4
  <meta charset="utf-8">
5
- <meta name="description"
6
- content="A collection of research papers from DeepSeek.">
7
  <meta name="keywords" content="DeepSeek, AI Research, Machine Learning">
8
  <meta name="viewport" content="width=device-width, initial-scale=1">
9
  <title>DeepSeek Research Papers</title>
10
-
11
- <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
12
- rel="stylesheet">
13
-
14
  <link rel="stylesheet" href="./static/css/bulma.min.css">
15
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
16
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
17
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
18
- <link rel="stylesheet"
19
- href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
20
  <link rel="stylesheet" href="./static/css/index.css">
21
  <link rel="icon" href="./static/images/favicon.svg">
22
-
23
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
24
  <script defer src="./static/js/fontawesome.all.min.js"></script>
25
  <script src="./static/js/bulma-carousel.min.js"></script>
@@ -27,7 +21,6 @@
27
  <script src="./static/js/index.js"></script>
28
  </head>
29
  <body>
30
-
31
  <section class="hero">
32
  <div class="hero-body">
33
  <div class="container is-max-desktop">
@@ -42,7 +35,6 @@
42
  </div>
43
  </div>
44
  </section>
45
-
46
  <section class="hero teaser">
47
  <div class="container is-max-desktop">
48
  <div class="hero-body">
@@ -52,7 +44,6 @@
52
  </div>
53
  </div>
54
  </section>
55
-
56
  <section class="section">
57
  <div class="container is-max-desktop">
58
  <div class="columns is-centered">
@@ -78,7 +69,6 @@
78
  <span class="is-size-6 has-text-grey">{Jan 6, 2024}</span>
79
  </div>
80
  </div>
81
-
82
  <!-- Paper 2 -->
83
  <div class="paper-item box">
84
  <h3 class="title is-4">2. DeepSeekMoE: Towards Ultimate Expert Specialization in Mixture-of-Experts Language Models</h3>
@@ -96,18 +86,267 @@
96
  <span class="is-size-6 has-text-grey">{Jan 11, 2024}</span>
97
  </div>
98
  </div>
99
-
100
- <!-- Remaining papers follow the same structure -->
101
- <!-- Paper 3 to 17 would be added here with the same pattern -->
102
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  </div>
104
  <!-- Paper List End -->
105
-
106
  </div>
107
  </div>
108
  </div>
109
  </section>
110
-
111
  <footer class="footer">
112
  <div class="container">
113
  <div class="content has-text-centered">
@@ -120,6 +359,5 @@
120
  </div>
121
  </div>
122
  </footer>
123
-
124
  </body>
125
  </html>
 
2
  <html>
3
  <head>
4
  <meta charset="utf-8">
5
+ <meta name="description" content="A collection of research papers from DeepSeek.">
 
6
  <meta name="keywords" content="DeepSeek, AI Research, Machine Learning">
7
  <meta name="viewport" content="width=device-width, initial-scale=1">
8
  <title>DeepSeek Research Papers</title>
9
+ <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
 
 
 
10
  <link rel="stylesheet" href="./static/css/bulma.min.css">
11
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
12
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
13
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
14
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
 
15
  <link rel="stylesheet" href="./static/css/index.css">
16
  <link rel="icon" href="./static/images/favicon.svg">
 
17
  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
18
  <script defer src="./static/js/fontawesome.all.min.js"></script>
19
  <script src="./static/js/bulma-carousel.min.js"></script>
 
21
  <script src="./static/js/index.js"></script>
22
  </head>
23
  <body>
 
24
  <section class="hero">
25
  <div class="hero-body">
26
  <div class="container is-max-desktop">
 
35
  </div>
36
  </div>
37
  </section>
 
38
  <section class="hero teaser">
39
  <div class="container is-max-desktop">
40
  <div class="hero-body">
 
44
  </div>
45
  </div>
46
  </section>
 
47
  <section class="section">
48
  <div class="container is-max-desktop">
49
  <div class="columns is-centered">
 
69
  <span class="is-size-6 has-text-grey">{Jan 6, 2024}</span>
70
  </div>
71
  </div>
 
72
  <!-- Paper 2 -->
73
  <div class="paper-item box">
74
  <h3 class="title is-4">2. DeepSeekMoE: Towards Ultimate Expert Specialization in Mixture-of-Experts Language Models</h3>
 
86
  <span class="is-size-6 has-text-grey">{Jan 11, 2024}</span>
87
  </div>
88
  </div>
89
+ <!-- Paper 3 -->
90
+ <div class="paper-item box">
91
+ <h3 class="title is-4">3. DeepSeek-Coder: When the Large Language Model Meets Programming -- The Rise of Code Intelligence</h3>
92
+ <p>Investigating the intersection of large language models and programming.</p>
93
+ <div class="publication-links">
94
+ <span class="link-block">
95
+ <a href="https://arxiv.org/abs/2401.14196" target="_blank"
96
+ class="external-link button is-normal is-rounded is-dark">
97
+ <span class="icon">
98
+ <i class="fas fa-file-pdf"></i>
99
+ </span>
100
+ <span>Paper</span>
101
+ </a>
102
+ </span>
103
+ <span class="is-size-6 has-text-grey">{Jan 25, 2024}</span>
104
+ </div>
105
+ </div>
106
+ <!-- Paper 4 -->
107
+ <div class="paper-item box">
108
+ <h3 class="title is-4">4. DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models</h3>
109
+ <p>Advancing mathematical reasoning capabilities in open language models.</p>
110
+ <div class="publication-links">
111
+ <span class="link-block">
112
+ <a href="https://arxiv.org/abs/2402.03300" target="_blank"
113
+ class="external-link button is-normal is-rounded is-dark">
114
+ <span class="icon">
115
+ <i class="fas fa-file-pdf"></i>
116
+ </span>
117
+ <span>Paper</span>
118
+ </a>
119
+ </span>
120
+ <span class="is-size-6 has-text-grey">{Feb 6, 2024}</span>
121
+ </div>
122
+ </div>
123
+ <!-- Paper 5 -->
124
+ <div class="paper-item box">
125
+ <h3 class="title is-4">5. DeepSeek-VL: Towards Real-World Vision-Language Understanding</h3>
126
+ <p>Focusing on real-world vision-language understanding.</p>
127
+ <div class="publication-links">
128
+ <span class="link-block">
129
+ <a href="https://arxiv.org/abs/2403.05525" target="_blank"
130
+ class="external-link button is-normal is-rounded is-dark">
131
+ <span class="icon">
132
+ <i class="fas fa-file-pdf"></i>
133
+ </span>
134
+ <span>Paper</span>
135
+ </a>
136
+ </span>
137
+ <span class="is-size-6 has-text-grey">{Mar 9, 2024}</span>
138
+ </div>
139
+ </div>
140
+ <!-- Paper 6 -->
141
+ <div class="paper-item box">
142
+ <h3 class="title is-4">6. DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model</h3>
143
+ <p>Developing a strong, economical, and efficient Mixture-of-Experts language model.</p>
144
+ <div class="publication-links">
145
+ <span class="link-block">
146
+ <a href="https://arxiv.org/abs/2405.04434" target="_blank"
147
+ class="external-link button is-normal is-rounded is-dark">
148
+ <span class="icon">
149
+ <i class="fas fa-file-pdf"></i>
150
+ </span>
151
+ <span>Paper</span>
152
+ </a>
153
+ </span>
154
+ <span class="is-size-6 has-text-grey">{May 7, 2024}</span>
155
+ </div>
156
+ </div>
157
+ <!-- Paper 7 -->
158
+ <div class="paper-item box">
159
+ <h3 class="title is-4">7. DeepSeek-Prover: Advancing Theorem Proving in LLMs through Large-Scale Synthetic Data</h3>
160
+ <p>Using large-scale synthetic data to advance theorem proving in LLMs.</p>
161
+ <div class="publication-links">
162
+ <span class="link-block">
163
+ <a href="https://arxiv.org/abs/2405.14333" target="_blank"
164
+ class="external-link button is-normal is-rounded is-dark">
165
+ <span class="icon">
166
+ <i class="fas fa-file-pdf"></i>
167
+ </span>
168
+ <span>Paper</span>
169
+ </a>
170
+ </span>
171
+ <span class="is-size-6 has-text-grey">{May 23, 2024}</span>
172
+ </div>
173
+ </div>
174
+ <!-- Paper 8 -->
175
+ <div class="paper-item box">
176
+ <h3 class="title is-4">8. DeepSeek-Coder-V2: Breaking the Barrier of Closed-Source Models in Code Intelligence</h3>
177
+ <p>Aiming to surpass closed-source models in code intelligence.</p>
178
+ <div class="publication-links">
179
+ <span class="link-block">
180
+ <a href="https://arxiv.org/abs/2406.11931" target="_blank"
181
+ class="external-link button is-normal is-rounded is-dark">
182
+ <span class="icon">
183
+ <i class="fas fa-file-pdf"></i>
184
+ </span>
185
+ <span>Paper</span>
186
+ </a>
187
+ </span>
188
+ <span class="is-size-6 has-text-grey">{Jun 17, 2024}</span>
189
+ </div>
190
+ </div>
191
+ <!-- Paper 9 -->
192
+ <div class="paper-item box">
193
+ <h3 class="title is-4">9. Let the Expert Stick to His Last: Expert-Specialized Fine-Tuning for Sparse Architectural Large Language Models</h3>
194
+ <p>Fine-tuning sparse architectural large language models with expert specialization.</p>
195
+ <div class="publication-links">
196
+ <span class="link-block">
197
+ <a href="https://arxiv.org/abs/2407.01906" target="_blank"
198
+ class="external-link button is-normal is-rounded is-dark">
199
+ <span class="icon">
200
+ <i class="fas fa-file-pdf"></i>
201
+ </span>
202
+ <span>Paper</span>
203
+ </a>
204
+ </span>
205
+ <span class="is-size-6 has-text-grey">{Jul 2, 2024}</span>
206
+ </div>
207
+ </div>
208
+ <!-- Paper 10 -->
209
+ <div class="paper-item box">
210
+ <h3 class="title is-4">10. DeepSeek-Prover-V1.5: Harnessing Proof Assistant Feedback for Reinforcement Learning and Monte-Carlo Tree Search</h3>
211
+ <p>Utilizing proof assistant feedback for reinforcement learning and Monte-Carlo Tree Search.</p>
212
+ <div class="publication-links">
213
+ <span class="link-block">
214
+ <a href="https://arxiv.org/abs/2408.08152" target="_blank"
215
+ class="external-link button is-normal is-rounded is-dark">
216
+ <span class="icon">
217
+ <i class="fas fa-file-pdf"></i>
218
+ </span>
219
+ <span>Paper</span>
220
+ </a>
221
+ </span>
222
+ <span class="is-size-6 has-text-grey">{Aug 15, 2024}</span>
223
+ </div>
224
+ </div>
225
+ <!-- Paper 11 -->
226
+ <div class="paper-item box">
227
+ <h3 class="title is-4">11. Janus: Decoupling Visual Encoding for Unified Multimodal Understanding and Generation</h3>
228
+ <p>Decoupling visual encoding for unified multimodal understanding and generation.</p>
229
+ <div class="publication-links">
230
+ <span class="link-block">
231
+ <a href="https://arxiv.org/abs/2410.13848" target="_blank"
232
+ class="external-link button is-normal is-rounded is-dark">
233
+ <span class="icon">
234
+ <i class="fas fa-file-pdf"></i>
235
+ </span>
236
+ <span>Paper</span>
237
+ </a>
238
+ </span>
239
+ <span class="is-size-6 has-text-grey">{Oct 17, 2024}</span>
240
+ </div>
241
+ </div>
242
+ <!-- Paper 12 -->
243
+ <div class="paper-item box">
244
+ <h3 class="title is-4">12. JanusFlow: Harmonizing Autoregression and Rectified Flow for Unified Multimodal Understanding and Generation</h3>
245
+ <p>Harmonizing autoregression and rectified flow for unified multimodal understanding and generation.</p>
246
+ <div class="publication-links">
247
+ <span class="link-block">
248
+ <a href="https://arxiv.org/abs/2411.07975" target="_blank"
249
+ class="external-link button is-normal is-rounded is-dark">
250
+ <span class="icon">
251
+ <i class="fas fa-file-pdf"></i>
252
+ </span>
253
+ <span>Paper</span>
254
+ </a>
255
+ </span>
256
+ <span class="is-size-6 has-text-grey">{Nov 12, 2024}</span>
257
+ </div>
258
+ </div>
259
+ <!-- Paper 13 -->
260
+ <div class="paper-item box">
261
+ <h3 class="title is-4">13. DeepSeek-VL2: Mixture-of-Experts Vision-Language Models for Advanced Multimodal Understanding</h3>
262
+ <p>Mixture-of-Experts Vision-Language Models for advanced multimodal understanding.</p>
263
+ <div class="publication-links">
264
+ <span class="link-block">
265
+ <a href="https://arxiv.org/abs/2412.10302" target="_blank"
266
+ class="external-link button is-normal is-rounded is-dark">
267
+ <span class="icon">
268
+ <i class="fas fa-file-pdf"></i>
269
+ </span>
270
+ <span>Paper</span>
271
+ </a>
272
+ </span>
273
+ <span class="is-size-6 has-text-grey">{Dec 13, 2024}</span>
274
+ </div>
275
+ </div>
276
+ <!-- Paper 14 -->
277
+ <div class="paper-item box">
278
+ <h3 class="title is-4">14. DeepSeek-V3 Technical Report</h3>
279
+ <p>Technical report for DeepSeek-V3.</p>
280
+ <div class="publication-links">
281
+ <span class="link-block">
282
+ <a href="https://arxiv.org/abs/2412.19437" target="_blank"
283
+ class="external-link button is-normal is-rounded is-dark">
284
+ <span class="icon">
285
+ <i class="fas fa-file-pdf"></i>
286
+ </span>
287
+ <span>Paper</span>
288
+ </a>
289
+ </span>
290
+ <span class="is-size-6 has-text-grey">{Dec 27, 2024}</span>
291
+ </div>
292
+ </div>
293
+ <!-- Paper 15 -->
294
+ <div class="paper-item box">
295
+ <h3 class="title is-4">15. DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning</h3>
296
+ <p>Incentivizing Reasoning Capability in LLMs via Reinforcement Learning.</p>
297
+ <div class="publication-links">
298
+ <span class="link-block">
299
+ <a href="https://arxiv.org/abs/2501.12948" target="_blank"
300
+ class="external-link button is-normal is-rounded is-dark">
301
+ <span class="icon">
302
+ <i class="fas fa-file-pdf"></i>
303
+ </span>
304
+ <span>Paper</span>
305
+ </a>
306
+ </span>
307
+ <span class="is-size-6 has-text-grey">{Jan 27, 2025}</span>
308
+ </div>
309
+ </div>
310
+ <!-- Paper 16 -->
311
+ <div class="paper-item box">
312
+ <h3 class="title is-4">16. Janus-Pro: Unified Multimodal Understanding and Generation with Data and Model Scaling</h3>
313
+ <p>Unified Multimodal Understanding and Generation with Data and Model Scaling.</p>
314
+ <div class="publication-links">
315
+ <span class="link-block">
316
+ <a href="https://arxiv.org/abs/2501.17811" target="_blank"
317
+ class="external-link button is-normal is-rounded is-dark">
318
+ <span class="icon">
319
+ <i class="fas fa-file-pdf"></i>
320
+ </span>
321
+ <span>Paper</span>
322
+ </a>
323
+ </span>
324
+ <span class="is-size-6 has-text-grey">{Jan 31, 2025}</span>
325
+ </div>
326
+ </div>
327
+ <!-- Paper 17 -->
328
+ <div class="paper-item box">
329
+ <h3 class="title is-4">17. Native Sparse Attention: Hardware-Aligned and Natively Trainable Sparse Attention</h3>
330
+ <p>Hardware-Aligned and Natively Trainable Sparse Attention.</p>
331
+ <div class="publication-links">
332
+ <span class="link-block">
333
+ <a href="https://arxiv.org/abs/2502.11089" target="_blank"
334
+ class="external-link button is-normal is-rounded is-dark">
335
+ <span class="icon">
336
+ <i class="fas fa-file-pdf"></i>
337
+ </span>
338
+ <span>Paper</span>
339
+ </a>
340
+ </span>
341
+ <span class="is-size-6 has-text-grey">{Feb 16, 2025}</span>
342
+ </div>
343
+ </div>
344
  </div>
345
  <!-- Paper List End -->
 
346
  </div>
347
  </div>
348
  </div>
349
  </section>
 
350
  <footer class="footer">
351
  <div class="container">
352
  <div class="content has-text-centered">
 
359
  </div>
360
  </div>
361
  </footer>
 
362
  </body>
363
  </html>