coai
/

sae_phi-3_mini_4k_ins_blocks_0_hook_mlp_out

Model card Files Files and versions Community

drsis commited on Aug 18, 2024

Commit

9259cd3

·

verified ·

1 Parent(s): 9ad0fe4

Create README.md

Files changed (1) hide show

README.md +114 -0

README.md ADDED Viewed

	@@ -0,0 +1,114 @@

+# Sparse Autoencoder Trained on "NeelNanda/c4-code-20k" Data
+HookPoint: blocks.0.hook_mlp_out_98304:v0
+Layer: 0
+Library SAE_Lens
+Config:
+act_store_device:"cuda"
+activation_fn:"relu"
+activation_fn_kwargs:
+adam_beta1:0.9
+adam_beta2:0.999
+apply_b_dec_to_input:false
+architecture:"standard"
+autocast:false
+autocast_lm:false
+b_dec_init_method:"zeros"
+cached_activations_path:null
+checkpoint_path:"checkpoints/fsu6y37f"
+compile_llm:false
+compile_sae:false
+context_size:512
+d_in:3,072
+d_sae:98,304
+dataset_path:"NeelNanda/c4-code-20k"
+dataset_trust_remote_code:true
+dead_feature_threshold:0.0001
+dead_feature_window:1,000
+decoder_heuristic_init:true
+decoder_orthogonal_init:false
+device:"cuda"
+dtype:"float32"
+eval_batch_size_prompts:null
+eval_every_n_wandb_logs:20
+expansion_factor:32
+feature_sampling_window:2,000
+finetuning_method:null
+finetuning_tokens:0
+from_pretrained_path:null
+hook_eval:"NOT_IN_USE"
+hook_head_index:null
+hook_layer:0
+hook_name:"blocks.0.hook_mlp_out"
+init_encoder_as_decoder_transpose:true
+is_dataset_tokenized:false
+l1_coefficient:8
+l1_warm_up_steps:6,000
+llm_compilation_mode:null
+log_activations_store_to_wandb:false
+log_optimizer_state_to_wandb:false
+log_to_wandb:true
+lp_norm:1
+lr:0.00005
+lr_decay_steps:24,000
+lr_end:0.000005
+lr_scheduler_name:"constant"
+lr_warm_up_steps:0
+model_class_name:"HookedTransformer"
+model_from_pretrained_kwargs:model_kwargs:
+model_name:"microsoft/Phi-3-mini-4k-instruct"
+mse_loss_normalization:null
+n_batches_in_buffer:64
+n_checkpoints:0
+n_eval_batches:10
+n_restart_cycles:1
+noise_scale:0
+normalize_activations:"expected_average_only_in"
+normalize_sae_decoder:false
+prepend_bos:true
+resume:false
+run_name:"98304-L1-8-LR-5e-05-Tokens-2.458e+08"
+sae_compilation_mode:null
+sae_lens_training_version:"3.14.0"
+sae_lens_version:"3.14.0"
+scale_sparsity_penalty_by_decoder_norm:true
+seed:42
+store_batch_size_prompts:16
+streaming:true
+tokens_per_buffer:67,108,864
+train_batch_size_tokens:2,048
+training_tokens:245,760,000
+use_cached_activations:false
+use_ghost_grads:false
+verbose:true
+wandb_entity:null
+wandb_id:null
+wandb_log_frequency:30
+wandb_project:"phi3-mini-sae"
+Results:
+details/current_l1_coefficient:8
+details/current_learning_rate:0
+details/n_training_tokens:245,760,000
+losses/auxiliary_reconstruction_loss:0
+losses/ghost_grad_loss:0
+losses/l1_loss:46.398345947265625
+losses/mse_loss:50.83685302734375
+losses/overall_loss:422.0236206054687
+metrics/ce_loss_score:0.993279762405592
+metrics/ce_loss_with_ablation:1.3571408987045288
+metrics/ce_loss_with_sae:1.1727865934371948
+metrics/ce_loss_without_sae:1.171539306640625
+metrics/explained_variance:0.9660131335258484
+metrics/explained_variance_std:0.02445772849023342
+metrics/l0:19.4072265625
+metrics/l2_norm_in:54.58721160888672
+metrics/l2_norm_out:1.9406952857971191
+metrics/l2_ratio:0.03532848507165909
+metrics/mean_log10_feature_sparsity:-7.757194519042969
+Link to wandb Report: https://api.wandb.ai/links/kdt/jhxn4aup