Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Sparse Autoencoder Trained on "NeelNanda/c4-code-20k" Data
|
2 |
+
|
3 |
+
HookPoint: blocks.0.hook_mlp_out_98304:v0
|
4 |
+
Layer: 0
|
5 |
+
|
6 |
+
Library SAE_Lens
|
7 |
+
|
8 |
+
Config:
|
9 |
+
|
10 |
+
act_store_device:"cuda"
|
11 |
+
activation_fn:"relu"
|
12 |
+
activation_fn_kwargs:
|
13 |
+
adam_beta1:0.9
|
14 |
+
adam_beta2:0.999
|
15 |
+
apply_b_dec_to_input:false
|
16 |
+
architecture:"standard"
|
17 |
+
autocast:false
|
18 |
+
autocast_lm:false
|
19 |
+
b_dec_init_method:"zeros"
|
20 |
+
cached_activations_path:null
|
21 |
+
checkpoint_path:"checkpoints/fsu6y37f"
|
22 |
+
compile_llm:false
|
23 |
+
compile_sae:false
|
24 |
+
context_size:512
|
25 |
+
d_in:3,072
|
26 |
+
d_sae:98,304
|
27 |
+
dataset_path:"NeelNanda/c4-code-20k"
|
28 |
+
dataset_trust_remote_code:true
|
29 |
+
dead_feature_threshold:0.0001
|
30 |
+
dead_feature_window:1,000
|
31 |
+
decoder_heuristic_init:true
|
32 |
+
decoder_orthogonal_init:false
|
33 |
+
device:"cuda"
|
34 |
+
dtype:"float32"
|
35 |
+
eval_batch_size_prompts:null
|
36 |
+
eval_every_n_wandb_logs:20
|
37 |
+
expansion_factor:32
|
38 |
+
feature_sampling_window:2,000
|
39 |
+
finetuning_method:null
|
40 |
+
finetuning_tokens:0
|
41 |
+
from_pretrained_path:null
|
42 |
+
hook_eval:"NOT_IN_USE"
|
43 |
+
hook_head_index:null
|
44 |
+
hook_layer:0
|
45 |
+
hook_name:"blocks.0.hook_mlp_out"
|
46 |
+
init_encoder_as_decoder_transpose:true
|
47 |
+
is_dataset_tokenized:false
|
48 |
+
l1_coefficient:8
|
49 |
+
l1_warm_up_steps:6,000
|
50 |
+
llm_compilation_mode:null
|
51 |
+
log_activations_store_to_wandb:false
|
52 |
+
log_optimizer_state_to_wandb:false
|
53 |
+
log_to_wandb:true
|
54 |
+
lp_norm:1
|
55 |
+
lr:0.00005
|
56 |
+
lr_decay_steps:24,000
|
57 |
+
lr_end:0.000005
|
58 |
+
lr_scheduler_name:"constant"
|
59 |
+
lr_warm_up_steps:0
|
60 |
+
model_class_name:"HookedTransformer"
|
61 |
+
model_from_pretrained_kwargs:model_kwargs:
|
62 |
+
model_name:"microsoft/Phi-3-mini-4k-instruct"
|
63 |
+
mse_loss_normalization:null
|
64 |
+
n_batches_in_buffer:64
|
65 |
+
n_checkpoints:0
|
66 |
+
n_eval_batches:10
|
67 |
+
n_restart_cycles:1
|
68 |
+
noise_scale:0
|
69 |
+
normalize_activations:"expected_average_only_in"
|
70 |
+
normalize_sae_decoder:false
|
71 |
+
prepend_bos:true
|
72 |
+
resume:false
|
73 |
+
run_name:"98304-L1-8-LR-5e-05-Tokens-2.458e+08"
|
74 |
+
sae_compilation_mode:null
|
75 |
+
sae_lens_training_version:"3.14.0"
|
76 |
+
sae_lens_version:"3.14.0"
|
77 |
+
scale_sparsity_penalty_by_decoder_norm:true
|
78 |
+
seed:42
|
79 |
+
store_batch_size_prompts:16
|
80 |
+
streaming:true
|
81 |
+
tokens_per_buffer:67,108,864
|
82 |
+
train_batch_size_tokens:2,048
|
83 |
+
training_tokens:245,760,000
|
84 |
+
use_cached_activations:false
|
85 |
+
use_ghost_grads:false
|
86 |
+
verbose:true
|
87 |
+
wandb_entity:null
|
88 |
+
wandb_id:null
|
89 |
+
wandb_log_frequency:30
|
90 |
+
wandb_project:"phi3-mini-sae"
|
91 |
+
|
92 |
+
|
93 |
+
Results:
|
94 |
+
details/current_l1_coefficient:8
|
95 |
+
details/current_learning_rate:0
|
96 |
+
details/n_training_tokens:245,760,000
|
97 |
+
losses/auxiliary_reconstruction_loss:0
|
98 |
+
losses/ghost_grad_loss:0
|
99 |
+
losses/l1_loss:46.398345947265625
|
100 |
+
losses/mse_loss:50.83685302734375
|
101 |
+
losses/overall_loss:422.0236206054687
|
102 |
+
metrics/ce_loss_score:0.993279762405592
|
103 |
+
metrics/ce_loss_with_ablation:1.3571408987045288
|
104 |
+
metrics/ce_loss_with_sae:1.1727865934371948
|
105 |
+
metrics/ce_loss_without_sae:1.171539306640625
|
106 |
+
metrics/explained_variance:0.9660131335258484
|
107 |
+
metrics/explained_variance_std:0.02445772849023342
|
108 |
+
metrics/l0:19.4072265625
|
109 |
+
metrics/l2_norm_in:54.58721160888672
|
110 |
+
metrics/l2_norm_out:1.9406952857971191
|
111 |
+
metrics/l2_ratio:0.03532848507165909
|
112 |
+
metrics/mean_log10_feature_sparsity:-7.757194519042969
|
113 |
+
|
114 |
+
Link to wandb Report: https://api.wandb.ai/links/kdt/jhxn4aup
|