mtasic85 commited on
Commit
91018ec
·
1 Parent(s): e2bef55

pretrain core 0

Browse files
Files changed (1) hide show
  1. scripts/pretrain-core-model-0.yaml +3 -3
scripts/pretrain-core-model-0.yaml CHANGED
@@ -64,7 +64,7 @@ train:
64
  # global_batch_size: 64
65
 
66
  # Number of samples per data-parallel rank (type: int, default: 4)
67
- micro_batch_size: 6
68
  # micro_batch_size: 4
69
  # micro_batch_size: 2
70
  # micro_batch_size: 1
@@ -113,8 +113,8 @@ eval:
113
  # Optimizer-related arguments
114
 
115
  optimizer:
116
- # class_path: torch.optim.AdamW
117
- class_path: torchao.prototype.low_bit_optim.AdamW8bit
118
  # class_path: torchao.prototype.low_bit_optim.AdamW4bit
119
  # class_path: bitsandbytes.optim.AdamW8bit
120
  # class_path: bitsandbytes.optim.PagedAdamW8bit
 
64
  # global_batch_size: 64
65
 
66
  # Number of samples per data-parallel rank (type: int, default: 4)
67
+ micro_batch_size: 5
68
  # micro_batch_size: 4
69
  # micro_batch_size: 2
70
  # micro_batch_size: 1
 
113
  # Optimizer-related arguments
114
 
115
  optimizer:
116
+ class_path: torch.optim.AdamW
117
+ # class_path: torchao.prototype.low_bit_optim.AdamW8bit
118
  # class_path: torchao.prototype.low_bit_optim.AdamW4bit
119
  # class_path: bitsandbytes.optim.AdamW8bit
120
  # class_path: bitsandbytes.optim.PagedAdamW8bit