Update README.md
Browse files
README.md
CHANGED
@@ -50,6 +50,11 @@ The model has 20b parameters (3 experts, each 8b each, 8b active parameters duri
|
|
50 |
```python
|
51 |
pip install transformers -U
|
52 |
```
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
```python
|
55 |
import torch
|
@@ -115,7 +120,7 @@ The image of ants climbing over a vertical surface highlights their ability to a
|
|
115 |
|
116 |
## Make a Idefics-2-MoE model from scratch using several pre-trained models
|
117 |
|
118 |
-
Download .py files that implement the
|
119 |
|
120 |
```python
|
121 |
pip install huggingface_hub
|
@@ -168,10 +173,10 @@ DEVICE='cuda'
|
|
168 |
|
169 |
model_id_1='lamm-mit/Cephalo-Idefics-2-vision-8b-beta'
|
170 |
|
171 |
-
model_1 = Idefics2ForConditionalGeneration.from_pretrained(
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
)
|
176 |
processor = AutoProcessor.from_pretrained(
|
177 |
f"{model_id_1}",
|
@@ -188,18 +193,18 @@ Now, load the rest of the models:
|
|
188 |
```python
|
189 |
model_id_2='HuggingFaceM4/idefics2-8b-chatty'
|
190 |
|
191 |
-
model_2 = Idefics2ForConditionalGeneration.from_pretrained(
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
)
|
196 |
|
197 |
model_id_3='HuggingFaceM4/idefics2-8b'
|
198 |
|
199 |
-
model_3 = Idefics2ForConditionalGeneration.from_pretrained(
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
)
|
204 |
```
|
205 |
Put on device:
|
|
|
50 |
```python
|
51 |
pip install transformers -U
|
52 |
```
|
53 |
+
Install FlashAttention-2
|
54 |
+
|
55 |
+
```python
|
56 |
+
pip install flash-attn --no-build-isolation
|
57 |
+
```
|
58 |
|
59 |
```python
|
60 |
import torch
|
|
|
120 |
|
121 |
## Make a Idefics-2-MoE model from scratch using several pre-trained models
|
122 |
|
123 |
+
Download .py files that implement the Idefics-2 Mixture-of-Expert Vision model:
|
124 |
|
125 |
```python
|
126 |
pip install huggingface_hub
|
|
|
173 |
|
174 |
model_id_1='lamm-mit/Cephalo-Idefics-2-vision-8b-beta'
|
175 |
|
176 |
+
model_1 = Idefics2ForConditionalGeneration.from_pretrained( model_id_1,
|
177 |
+
torch_dtype=torch.bfloat16, #if your GPU allows
|
178 |
+
_attn_implementation="flash_attention_2", #make sure Flash Attention 2 is installed
|
179 |
+
trust_remote_code=True,
|
180 |
)
|
181 |
processor = AutoProcessor.from_pretrained(
|
182 |
f"{model_id_1}",
|
|
|
193 |
```python
|
194 |
model_id_2='HuggingFaceM4/idefics2-8b-chatty'
|
195 |
|
196 |
+
model_2 = Idefics2ForConditionalGeneration.from_pretrained( model_id_2,
|
197 |
+
torch_dtype=torch.bfloat16, #if your GPU allows
|
198 |
+
_attn_implementation="flash_attention_2", #make sure Flash Attention 2 is installed
|
199 |
+
trust_remote_code=True,
|
200 |
)
|
201 |
|
202 |
model_id_3='HuggingFaceM4/idefics2-8b'
|
203 |
|
204 |
+
model_3 = Idefics2ForConditionalGeneration.from_pretrained( model_id_3,
|
205 |
+
torch_dtype=torch.bfloat16, #if your GPU allows
|
206 |
+
_attn_implementation="flash_attention_2", #make sure Flash Attention 2 is installed
|
207 |
+
trust_remote_code=True,
|
208 |
)
|
209 |
```
|
210 |
Put on device:
|