drsis commited on
Commit
9259cd3
·
verified ·
1 Parent(s): 9ad0fe4

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +114 -0
README.md ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Sparse Autoencoder Trained on "NeelNanda/c4-code-20k" Data
2
+
3
+ HookPoint: blocks.0.hook_mlp_out_98304:v0
4
+ Layer: 0
5
+
6
+ Library SAE_Lens
7
+
8
+ Config:
9
+
10
+ act_store_device:"cuda"
11
+ activation_fn:"relu"
12
+ activation_fn_kwargs:
13
+ adam_beta1:0.9
14
+ adam_beta2:0.999
15
+ apply_b_dec_to_input:false
16
+ architecture:"standard"
17
+ autocast:false
18
+ autocast_lm:false
19
+ b_dec_init_method:"zeros"
20
+ cached_activations_path:null
21
+ checkpoint_path:"checkpoints/fsu6y37f"
22
+ compile_llm:false
23
+ compile_sae:false
24
+ context_size:512
25
+ d_in:3,072
26
+ d_sae:98,304
27
+ dataset_path:"NeelNanda/c4-code-20k"
28
+ dataset_trust_remote_code:true
29
+ dead_feature_threshold:0.0001
30
+ dead_feature_window:1,000
31
+ decoder_heuristic_init:true
32
+ decoder_orthogonal_init:false
33
+ device:"cuda"
34
+ dtype:"float32"
35
+ eval_batch_size_prompts:null
36
+ eval_every_n_wandb_logs:20
37
+ expansion_factor:32
38
+ feature_sampling_window:2,000
39
+ finetuning_method:null
40
+ finetuning_tokens:0
41
+ from_pretrained_path:null
42
+ hook_eval:"NOT_IN_USE"
43
+ hook_head_index:null
44
+ hook_layer:0
45
+ hook_name:"blocks.0.hook_mlp_out"
46
+ init_encoder_as_decoder_transpose:true
47
+ is_dataset_tokenized:false
48
+ l1_coefficient:8
49
+ l1_warm_up_steps:6,000
50
+ llm_compilation_mode:null
51
+ log_activations_store_to_wandb:false
52
+ log_optimizer_state_to_wandb:false
53
+ log_to_wandb:true
54
+ lp_norm:1
55
+ lr:0.00005
56
+ lr_decay_steps:24,000
57
+ lr_end:0.000005
58
+ lr_scheduler_name:"constant"
59
+ lr_warm_up_steps:0
60
+ model_class_name:"HookedTransformer"
61
+ model_from_pretrained_kwargs:model_kwargs:
62
+ model_name:"microsoft/Phi-3-mini-4k-instruct"
63
+ mse_loss_normalization:null
64
+ n_batches_in_buffer:64
65
+ n_checkpoints:0
66
+ n_eval_batches:10
67
+ n_restart_cycles:1
68
+ noise_scale:0
69
+ normalize_activations:"expected_average_only_in"
70
+ normalize_sae_decoder:false
71
+ prepend_bos:true
72
+ resume:false
73
+ run_name:"98304-L1-8-LR-5e-05-Tokens-2.458e+08"
74
+ sae_compilation_mode:null
75
+ sae_lens_training_version:"3.14.0"
76
+ sae_lens_version:"3.14.0"
77
+ scale_sparsity_penalty_by_decoder_norm:true
78
+ seed:42
79
+ store_batch_size_prompts:16
80
+ streaming:true
81
+ tokens_per_buffer:67,108,864
82
+ train_batch_size_tokens:2,048
83
+ training_tokens:245,760,000
84
+ use_cached_activations:false
85
+ use_ghost_grads:false
86
+ verbose:true
87
+ wandb_entity:null
88
+ wandb_id:null
89
+ wandb_log_frequency:30
90
+ wandb_project:"phi3-mini-sae"
91
+
92
+
93
+ Results:
94
+ details/current_l1_coefficient:8
95
+ details/current_learning_rate:0
96
+ details/n_training_tokens:245,760,000
97
+ losses/auxiliary_reconstruction_loss:0
98
+ losses/ghost_grad_loss:0
99
+ losses/l1_loss:46.398345947265625
100
+ losses/mse_loss:50.83685302734375
101
+ losses/overall_loss:422.0236206054687
102
+ metrics/ce_loss_score:0.993279762405592
103
+ metrics/ce_loss_with_ablation:1.3571408987045288
104
+ metrics/ce_loss_with_sae:1.1727865934371948
105
+ metrics/ce_loss_without_sae:1.171539306640625
106
+ metrics/explained_variance:0.9660131335258484
107
+ metrics/explained_variance_std:0.02445772849023342
108
+ metrics/l0:19.4072265625
109
+ metrics/l2_norm_in:54.58721160888672
110
+ metrics/l2_norm_out:1.9406952857971191
111
+ metrics/l2_ratio:0.03532848507165909
112
+ metrics/mean_log10_feature_sparsity:-7.757194519042969
113
+
114
+ Link to wandb Report: https://api.wandb.ai/links/kdt/jhxn4aup