Spaces:
Running
Running
final final final typos (#72)
Browse files- adding pdf (835ee81a735ef894a6a4417ae7fbbd3d6de568a9)
- push (dd1a9ee9858481499fc601b99ab7221b6a0efcd8)
- .gitattributes +1 -0
- The_Ultra-Scale_Playbook_Training_LLMs_on_GPU_Clusters.pdf +3 -0
- assets/images/256px-PDF.png +3 -0
- dist/assets/images/256px-PDF.png +3 -0
- dist/distill.bundle.js +1 -1
- dist/distill.bundle.js.map +0 -0
- dist/index.html +3 -2
- src/distill.js +6 -0
- src/index.html +3 -2
.gitattributes
CHANGED
@@ -18,6 +18,7 @@
|
|
18 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
19 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
20 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
|
|
21 |
*.pickle filter=lfs diff=lfs merge=lfs -text
|
22 |
*.pkl filter=lfs diff=lfs merge=lfs -text
|
23 |
*.png filter=lfs diff=lfs merge=lfs -text
|
|
|
18 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
19 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
20 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
22 |
*.pickle filter=lfs diff=lfs merge=lfs -text
|
23 |
*.pkl filter=lfs diff=lfs merge=lfs -text
|
24 |
*.png filter=lfs diff=lfs merge=lfs -text
|
The_Ultra-Scale_Playbook_Training_LLMs_on_GPU_Clusters.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:274a19a2577ed220cd3a102b4469c44310e4a7c8e8f8ebc36842d907cb51e127
|
3 |
+
size 14059172
|
assets/images/256px-PDF.png
ADDED
![]() |
Git LFS Details
|
dist/assets/images/256px-PDF.png
ADDED
![]() |
Git LFS Details
|
dist/distill.bundle.js
CHANGED
@@ -2146,7 +2146,7 @@ function _arrayWithHoles(r) { if (Array.isArray(r)) return r; }
|
|
2146 |
function bylineTemplate(frontMatter) {
|
2147 |
return "\n <div class=\"byline grid\">\n <div>\n <h3>Authors</h3>\n <div>\n ".concat(frontMatter.authors.map(function (author, i) {
|
2148 |
return "\n <span class=\"author\">\n ".concat(author.personalURL ? "\n <a class=\"name\" href=\"".concat(author.personalURL, "\">").concat(author.name) + (i + 1 < frontMatter.authors.length ? "," : "") + "</a>" : "\n <span class=\"name\">".concat(author.name) + (i + 1 < frontMatter.authors.length ? "," : "") + "</span>", "\n </span>\n ");
|
2149 |
-
}).join(''), "\n </div>\n </div>\n <div >\n <h3>Affiliation</h3>\n <div><a href=\"https://huggingface.co/\">Hugging Face</a>\n </div>\n </div>\n <div >\n <h3>Published</h3>\n <div>Feb 19, 2025</div>\n </div>\n </div>\n");
|
2150 |
}
|
2151 |
var Byline = /*#__PURE__*/function (_HTMLElement4) {
|
2152 |
function Byline() {
|
|
|
2146 |
function bylineTemplate(frontMatter) {
|
2147 |
return "\n <div class=\"byline grid\">\n <div>\n <h3>Authors</h3>\n <div>\n ".concat(frontMatter.authors.map(function (author, i) {
|
2148 |
return "\n <span class=\"author\">\n ".concat(author.personalURL ? "\n <a class=\"name\" href=\"".concat(author.personalURL, "\">").concat(author.name) + (i + 1 < frontMatter.authors.length ? "," : "") + "</a>" : "\n <span class=\"name\">".concat(author.name) + (i + 1 < frontMatter.authors.length ? "," : "") + "</span>", "\n </span>\n ");
|
2149 |
+
}).join(''), "\n </div>\n </div>\n <div >\n <h3>Affiliation</h3>\n <div><a href=\"https://huggingface.co/\">Hugging Face</a>\n </div>\n </div>\n <div >\n <h3>Published</h3>\n <div>Feb 19, 2025</div>\n </div>\n </div>\n <div class=\"side pdf-download\">\n <a href=\"https://huggingface.co/spaces/nanotron/ultrascale-playbook/resolve/main/The_Ultra-Scale_Playbook_Training_LLMs_on_GPU_Clusters.pdf\">Download PDF\n <br>\n <img style=\"width: 32px;\" src=\"../assets/images/256px-PDF.png\" alt=\"PDF\"></a>\n \n </div>\n");
|
2150 |
}
|
2151 |
var Byline = /*#__PURE__*/function (_HTMLElement4) {
|
2152 |
function Byline() {
|
dist/distill.bundle.js.map
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
dist/index.html
CHANGED
@@ -75,7 +75,7 @@
|
|
75 |
<p>
|
76 |
Thousands of GPUs humming in perfect harmony. That's what it takes to train today's most powerful AI models β a symphony of computing power that until recently was the exclusive domain of elite research labs. Open source has transformed this landscape, but not completely. Yes, you can download the latest <a href="https://huggingface.co/meta-llama">Llama</a> or <a href="https://huggingface.co/deepseek-ai">DeepSeek</a> models. Yes, you can read their <a href="https://ai.meta.com/research/publications/the-llama-3-herd-of-models/">technical</a> and <a href="https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf">experiment</a> reports. But the most challenging part β the training code, the knowledge and technics necessary to coordinate GPUs to train these massive systems β remains shrouded in complexity and spread around a series of disconnected papers and often private codebases.
|
77 |
</p>
|
78 |
-
<aside>Reading time: 2-4 days. For the best reading experience, we recommend not using a mobile phone.</aside>
|
79 |
<p>
|
80 |
This open-source book is here to changes that. Starting from the basics, we'll walk you through the knowledge necessary to scale the training of large language models from one GPU to tens, hundreds and even thousands of GPUs, illustrating theory with practical code examples and reproducible benchmarks.
|
81 |
</p>
|
@@ -3829,7 +3829,8 @@
|
|
3829 |
}
|
3830 |
if (level === 0)
|
3831 |
ToC += '<div>' + link + '</div>';
|
3832 |
-
else
|
|
|
3833 |
ToC += '<li>' + link + '</li>';
|
3834 |
}
|
3835 |
|
|
|
75 |
<p>
|
76 |
Thousands of GPUs humming in perfect harmony. That's what it takes to train today's most powerful AI models β a symphony of computing power that until recently was the exclusive domain of elite research labs. Open source has transformed this landscape, but not completely. Yes, you can download the latest <a href="https://huggingface.co/meta-llama">Llama</a> or <a href="https://huggingface.co/deepseek-ai">DeepSeek</a> models. Yes, you can read their <a href="https://ai.meta.com/research/publications/the-llama-3-herd-of-models/">technical</a> and <a href="https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf">experiment</a> reports. But the most challenging part β the training code, the knowledge and technics necessary to coordinate GPUs to train these massive systems β remains shrouded in complexity and spread around a series of disconnected papers and often private codebases.
|
77 |
</p>
|
78 |
+
<aside>Reading time: 2-4 days. <br>For the best reading experience, we recommend not using a mobile phone.</aside>
|
79 |
<p>
|
80 |
This open-source book is here to changes that. Starting from the basics, we'll walk you through the knowledge necessary to scale the training of large language models from one GPU to tens, hundreds and even thousands of GPUs, illustrating theory with practical code examples and reproducible benchmarks.
|
81 |
</p>
|
|
|
3829 |
}
|
3830 |
if (level === 0)
|
3831 |
ToC += '<div>' + link + '</div>';
|
3832 |
+
else
|
3833 |
+
// else if (level === 1)
|
3834 |
ToC += '<li>' + link + '</li>';
|
3835 |
}
|
3836 |
|
src/distill.js
CHANGED
@@ -2105,6 +2105,12 @@ d-appendix > distill-appendix {
|
|
2105 |
<div>Feb 19, 2025</div>
|
2106 |
</div>
|
2107 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
2108 |
`;
|
2109 |
}
|
2110 |
|
|
|
2105 |
<div>Feb 19, 2025</div>
|
2106 |
</div>
|
2107 |
</div>
|
2108 |
+
<div class="side pdf-download">
|
2109 |
+
<a href="https://huggingface.co/spaces/nanotron/ultrascale-playbook/resolve/main/The_Ultra-Scale_Playbook_Training_LLMs_on_GPU_Clusters.pdf">Download PDF
|
2110 |
+
<br>
|
2111 |
+
<img style="width: 32px;" src="../assets/images/256px-PDF.png" alt="PDF"></a>
|
2112 |
+
|
2113 |
+
</div>
|
2114 |
`;
|
2115 |
}
|
2116 |
|
src/index.html
CHANGED
@@ -75,7 +75,7 @@
|
|
75 |
<p>
|
76 |
Thousands of GPUs humming in perfect harmony. That's what it takes to train today's most powerful AI models β a symphony of computing power that until recently was the exclusive domain of elite research labs. Open source has transformed this landscape, but not completely. Yes, you can download the latest <a href="https://huggingface.co/meta-llama">Llama</a> or <a href="https://huggingface.co/deepseek-ai">DeepSeek</a> models. Yes, you can read their <a href="https://ai.meta.com/research/publications/the-llama-3-herd-of-models/">technical</a> and <a href="https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf">experiment</a> reports. But the most challenging part β the training code, the knowledge and technics necessary to coordinate GPUs to train these massive systems β remains shrouded in complexity and spread around a series of disconnected papers and often private codebases.
|
77 |
</p>
|
78 |
-
<aside>Reading time: 2-4 days. For the best reading experience, we recommend not using a mobile phone.</aside>
|
79 |
<p>
|
80 |
This open-source book is here to changes that. Starting from the basics, we'll walk you through the knowledge necessary to scale the training of large language models from one GPU to tens, hundreds and even thousands of GPUs, illustrating theory with practical code examples and reproducible benchmarks.
|
81 |
</p>
|
@@ -3829,7 +3829,8 @@
|
|
3829 |
}
|
3830 |
if (level === 0)
|
3831 |
ToC += '<div>' + link + '</div>';
|
3832 |
-
else
|
|
|
3833 |
ToC += '<li>' + link + '</li>';
|
3834 |
}
|
3835 |
|
|
|
75 |
<p>
|
76 |
Thousands of GPUs humming in perfect harmony. That's what it takes to train today's most powerful AI models β a symphony of computing power that until recently was the exclusive domain of elite research labs. Open source has transformed this landscape, but not completely. Yes, you can download the latest <a href="https://huggingface.co/meta-llama">Llama</a> or <a href="https://huggingface.co/deepseek-ai">DeepSeek</a> models. Yes, you can read their <a href="https://ai.meta.com/research/publications/the-llama-3-herd-of-models/">technical</a> and <a href="https://github.com/deepseek-ai/DeepSeek-R1/blob/main/DeepSeek_R1.pdf">experiment</a> reports. But the most challenging part β the training code, the knowledge and technics necessary to coordinate GPUs to train these massive systems β remains shrouded in complexity and spread around a series of disconnected papers and often private codebases.
|
77 |
</p>
|
78 |
+
<aside>Reading time: 2-4 days. <br>For the best reading experience, we recommend not using a mobile phone.</aside>
|
79 |
<p>
|
80 |
This open-source book is here to changes that. Starting from the basics, we'll walk you through the knowledge necessary to scale the training of large language models from one GPU to tens, hundreds and even thousands of GPUs, illustrating theory with practical code examples and reproducible benchmarks.
|
81 |
</p>
|
|
|
3829 |
}
|
3830 |
if (level === 0)
|
3831 |
ToC += '<div>' + link + '</div>';
|
3832 |
+
else
|
3833 |
+
// else if (level === 1)
|
3834 |
ToC += '<li>' + link + '</li>';
|
3835 |
}
|
3836 |
|